Esempio n. 1
0
 def get_probability(self, img_list):
     batch_size = 32
     self.set_models(inference=True)
     if isinstance(img_list, (list, str)):
         if isinstance(img_list, (tuple, list)):
             if len(img_list) >= 32:
                 test_dist = ImageDistributor(img_list)
                 results = []
                 bar = tqdm(range(int(np.ceil(len(test_dist) /
                                              batch_size))))
                 for i, (x_img_list, _) in enumerate(
                         test_dist.batch(batch_size, shuffle=False)):
                     img_array = np.vstack([
                         load_img(path, self.imsize)[None]
                         for path in x_img_list
                     ])
                     img_array = self.preprocess(img_array)
                     results.extend(
                         np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                                   axis=1))
                     bar.update(1)
                 return results
             img_array = np.vstack(
                 [load_img(path, self.imsize)[None] for path in img_list])
             img_array = self.preprocess(img_array)
         else:
             img_array = load_img(img_list, self.imsize)[None]
             img_array = self.preprocess(img_array)
             return np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                              axis=1)[0]
     else:
         img_array = img_list
     return rm.softmax(self(img_array)).as_ndarray()
Esempio n. 2
0
 def predict(self, img_list):
     self.set_models(inference=True)
     if isinstance(img_list, (list, str)):
         if isinstance(img_list, (tuple, list)):
             img_array = np.vstack([load_img(path, self.imsize)[None] for path in img_list])
             img_array = self.preprocess(img_array)
         else:
             img_array = load_img(img_list, self.imsize)[None]
             img_array = self.preprocess(img_array)
             return np.argmax(rm.softmax(self(img_array)[1]).as_ndarray(), axis=1)[0]
     else:
         img_array = img_list
     return np.argmax(rm.softmax(self(img_array)[1]).as_ndarray(), axis=1)
Esempio n. 3
0
def test_gpu_node_softmax(a):
    set_cuda_active(True)

    g1 = Variable(a)

    g3 = rm.sum(rm.softmax(g1))
    g = g3.grad()
    g_g1 = g.get(g1)
    g3.to_cpu()

    set_cuda_active(False)
    c3 = rm.sum(rm.softmax(g1))
    c = c3.grad()
    c_g1 = c.get(g1)

    close(g3, c3)
    close(c_g1, g_g1)
Esempio n. 4
0
 def predict(self, src_seq, beam_width=10):
     src_seq = src_seq[::-1]
     xi = [self.src_w2i.get(word, self.src_w2i['<unk>']) for word in src_seq] # input word to index 
     xi = np.array(xi).reshape(len(xi),1)
     xe = self.l1(xi) # index to vector(embedding)
     # encode
     for x in xe:
         h = self.encode(x.reshape(1,-1))
         
     # decode
     cnt = 1
     limit = 100
     L = 0
     H = {}
     H['z'] = h
     H['state'] = self.l2._state
     word = '<bos>'
     sentence = [word]
     t = (L, sentence, H)
     Q = [t]
     is_all_eos = False
     while is_all_eos == False and cnt <= limit + 1: # limit + 1 for <'eos'>
         cand = list()
         is_all_eos = True
         for L, sentence, H in Q:
             self.l4._z = H['z']
             self.l4._state = H['state']
             word = sentence[-1]
             
             if word == '<eos>':
                 t = (L, sentence, H)
                 cand.append(t)
             else:
                 is_all_eos = False
                 yi = [self.tar_w2i[word]]
                 yi = np.array(yi).reshape(len(yi),-1)
                 ye = self.l3(yi)
                 y = ye.reshape(1,-1)
                 yy = self.decode(y)
                 p = rm.softmax(yy)
                 p = rm.log(p).as_ndarray()
                 p = p[0]
                 z = {}
                 z['z'] = self.l4._z
                 z['state'] = self.l4._state
                 for i in range(self.tar_vocab_size):
                     w = self.tar_i2w[i]
                     s = sentence + [w]
                     l = L + p[i]
                     t = (l, s, z)
                     cand.append(t)
                     
         cand = sorted(cand, key=lambda tup:tup[0], reverse=True)
         Q = cand[:beam_width]
         cnt += 1
     self.truncate()
     _, sentence, _ = Q[0]
     return sentence
Esempio n. 5
0
    def get_bbox(self, z, score_threshold=0.6, nms_threshold=0.45):
        N = len(z)
        class_num = len(self.class_map)
        top_k = 100
        if hasattr(z, 'as_ndarray'):
            z = z.as_ndarray()

        loc, conf = np.split(z, [4], axis=2)
        loc = np.concatenate([self.decode_box(loc[n])[None] for n in range(N)],
                             axis=0)
        loc = np.clip(loc, 0, 1)
        loc[:, :, 2:] = loc[:, :, 2:] - loc[:, :, :2]
        loc[:, :, :2] += loc[:, :, 2:] / 2.

        conf = rm.softmax(conf.transpose(0, 2,
                                         1)).as_ndarray().transpose(0, 2, 1)

        result_bbox = []
        conf = conf[:, :, 1:]
        conf[conf < score_threshold] = 0

        # Transpose are required for manipulate tensors as `class major` order.
        # (N, box, class) => (N, class, box)
        sorted_conf_index = np.argsort(-conf,
                                       axis=1)  # Arg sort by dicending order.
        keep_index = (np.argsort(sorted_conf_index, axis=1) < top_k).transpose(
            0, 2, 1)

        conf = conf.transpose(0, 2, 1)
        conf = conf[keep_index].reshape(N, class_num, -1)

        loc = np.concatenate([
            loc[(keep_index[:, c, :].reshape(N, -1, 1) *
                 np.ones_like(loc)).astype(np.bool)].reshape(N, 1, -1, 4)
            for c in range(class_num)
        ],
                             axis=1)

        for n in range(N):
            nth_result = []
            nth_loc = loc[n]
            for ndind in np.ndindex(*conf.shape[1:]):
                if conf[n, ndind[0], ndind[1]] < score_threshold:
                    continue
                nth_result.append({
                    "box":
                    nth_loc[ndind[0], ndind[1]].tolist(),
                    "name":
                    self.class_map[ndind[0]].decode('utf-8'),
                    "class":
                    int(ndind[0]),
                    "score":
                    float(conf[n, ndind[0], ndind[1]])
                })
            result_bbox.append(nth_result)
        ret = nms(result_bbox, nms_threshold)
        return ret
Esempio n. 6
0
    def forward(self, x):
        """Performs forward propagation.
        This function can be called using ``__call__`` method.
        See following example of method usage.

        Args:
            x (ndarray, Node): Input image as an tensor.

        Returns:
            (Node): Returns raw output of yolo v1.
            You can reform it to bounding box form using the method ``get_bbox``.

        Example:
            >>> import numpy as np
            >>> from renom_img.api.detection.yolo_v2 import Yolov2
            >>>
            >>> x = np.random.rand(1, 3, 224, 224)
            >>> class_map = ["dog", "cat"]
            >>> model = Yolov2(class_map)
            >>> y = model.forward(x) # Forward propagation.
            >>> y = model(x)  # Same as above result.
            >>>
            >>> bbox = model.get_bbox(y) # The output can be reformed using get_bbox method.

        """

        assert len(self.class_map) > 0, \
            "Class map is empty. Please set the attribute class_map when instantiate model class. " +\
            "Or, please load already trained model using the method 'load()'."
        assert self.num_anchor > 0, \
            "Anchor list is empty. Please calculate anchor list using create_anchor function, before instantiate model class.  " +\
            "Or, please load already trained model using the method 'load()'."

        self._freezed_network.set_auto_update(self.train_whole_network)
        self._freezed_network.set_models(inference=(
            not self.train_whole_network or getattr(self, 'inference', False)))

        h, f = self._freezed_network(x)
        f = self._conv21(f)
        h = self._conv1(h)

        h = self._conv2(rm.concat(h,
                                  rm.concat([f[:, :, i::2, j::2] for i in range(2) for j in range(2)])))

        out = self._last(h)
        # Create yolo format.
        N, C, H, W = h.shape

        reshaped = out.reshape(N, self.num_anchor, -1, W * H)
        conf = rm.sigmoid(reshaped[:, :, 0:1]).transpose(0, 2, 1, 3)
        px = rm.sigmoid(reshaped[:, :, 1:2]).transpose(0, 2, 1, 3)
        py = rm.sigmoid(reshaped[:, :, 2:3]).transpose(0, 2, 1, 3)
        pw = rm.exp(reshaped[:, :, 3:4]).transpose(0, 2, 1, 3)
        ph = rm.exp(reshaped[:, :, 4:5]).transpose(0, 2, 1, 3)
        cl = rm.softmax(reshaped[:, :, 5:].transpose(0, 2, 1, 3))
        return rm.concat(conf, px, py, pw, ph, cl).transpose(0, 2, 1, 3).reshape(N, -1, H, W)
Esempio n. 7
0
    def predict(self, img_list):
        """Perform prediction.
        Argument can be an image array, image path list or a image path.

        Args:
            img_list(ndarray, list, string): Image array, image path list or image path.

        Return:
            (list): List of class of each image.

        """
        batch_size = 32
        self.set_models(inference=True)
        if isinstance(img_list, (list, str)):
            if isinstance(img_list, (tuple, list)):
                if len(img_list) >= 32:
                    test_dist = ImageDistributor(img_list)
                    results = []
                    bar = tqdm(range(int(np.ceil(len(test_dist) /
                                                 batch_size))))
                    for i, (x_img_list, _) in enumerate(
                            test_dist.batch(batch_size, shuffle=False)):
                        img_array = np.vstack([
                            load_img(path, self.imsize)[None]
                            for path in x_img_list
                        ])
                        img_array = self.preprocess(img_array)
                        results.extend(
                            np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                                      axis=1))
                        bar.update(1)
                    return results
                img_array = np.vstack(
                    [load_img(path, self.imsize)[None] for path in img_list])
                img_array = self.preprocess(img_array)
            else:
                img_array = load_img(img_list, self.imsize)[None]
                img_array = self.preprocess(img_array)
                return np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                                 axis=1)[0]
        else:
            img_array = img_list
        return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
Esempio n. 8
0
    def predict(self, img_list):
        """
        Returns:
            (Numpy.array or list): If only an image or a path is given, an array whose shape is **(width, height)** is returned.
            If multiple images or paths are given, then a list in which there are arrays whose shape is **(width, height)** is returned.
        """

        batch_size = 32
        self.set_models(inference=True)
        if isinstance(img_list, (list, str)):
            if isinstance(img_list, (tuple, list)):
                if len(img_list) >= 32:
                    test_dist = ImageDistributor(img_list)
                    results = []
                    bar = tqdm()
                    bar.total = int(np.ceil(len(test_dist) / batch_size))
                    for i, (x_img_list, _) in enumerate(
                            test_dist.batch(batch_size, shuffle=False)):
                        img_array = np.vstack([
                            load_img(path, self.imsize)[None]
                            for path in x_img_list
                        ])
                        img_array = self.preprocess(img_array)
                        results.extend(
                            np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                                      axis=1))
                        bar.update(1)
                    return results
                img_array = np.vstack(
                    [load_img(path, self.imsize)[None] for path in img_list])
                img_array = self.preprocess(img_array)
            else:
                img_array = load_img(img_list, self.imsize)[None]
                img_array = self.preprocess(img_array)
                return np.argmax(rm.softmax(self(img_array)).as_ndarray(),
                                 axis=1)[0]
        else:
            img_array = img_list
        return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
Esempio n. 9
0
def yolo_predict(model, input_x):
    output = model(input_x)
    batch_size, _, grid_h, grid_w = output.shape
    output_reshape = np.reshape(output, (batch_size, model.bbox, model.classes+5, grid_h, grid_w))
    x, y, w, h, conf, prob = output_reshape[:,:,0:1,:,:], output_reshape[:,:,1:2,:,:],output_reshape[:,:,2:3,:,:], output_reshape[:,:,3:4,:,:], output_reshape[:,:,4:5,:,:], output_reshape[:,:,5:,:,:]
    x = rm.sigmoid(x) # xのactivation
    y = rm.sigmoid(y) # yのactivation
    conf = rm.sigmoid(conf) # confのactivation
    prob = np.transpose(prob, (0, 2, 1, 3, 4))
    prob = rm.softmax(prob) # probablitiyのacitivation
    prob = np.transpose(prob, (0, 2, 1, 3, 4))

    # x, y, w, hを絶対座標へ変換
    x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)
    y_shift = np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)
    w_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 0], (model.bbox, 1, 1, 1)), w.shape)
    h_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 1], (model.bbox, 1, 1, 1)), h.shape)
    #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()
    box_x = (x + x_shift) / grid_w
    box_y = (y + y_shift) / grid_h
    box_w = np.exp(w) * w_anchor / grid_w
    box_h = np.exp(h) * h_anchor / grid_h

    return box_x, box_y, box_w, box_h, conf, prob
Esempio n. 10
0
 def func(node, x):
     return rm.cross_entropy(rm.softmax(node), x)
Esempio n. 11
0
 def predict(self, x):
     output_network = np.argmax(rm.softmax(self.network(x)).as_ndarray(), axis=1)
     result_array = [self.lb.classes_[output] for output in output_network]
     return np.array(result_array)
Esempio n. 12
0
    def run(self):
        model = self.model
        self.state = State.STARTED
        self.running_state = RunningState.TRAINING
        if self.task_id == Task.DETECTION.value:
            valid_target = self.valid_dist.get_resized_annotation_list(
                self.imsize)

        if self.stop_event.is_set():
            # Watch stop event
            self.updated = True
            return

        for e in range(self.total_epoch):
            release_mem_pool()
            self.nth_epoch = e
            if self.stop_event.is_set():
                # Watch stop event
                self.updated = True
                return

            model.set_models(inference=False)
            temp_train_batch_loss_list = []

            self.running_state = RunningState.TRAINING
            self.sync_state()

            for b, (train_x, train_y) in enumerate(
                    self.train_dist.batch(self.batch_size), 1):
                if isinstance(self.model,
                              Yolov2) and (b - 1) % 10 == 0 and (b - 1):
                    release_mem_pool()

                self.nth_batch = b
                if self.stop_event.is_set():
                    # Watch stop event
                    self.updated = True
                    return

                if len(train_x) > 0:
                    with model.train():
                        loss = model.loss(model(train_x), train_y)
                        reg_loss = loss + model.regularize()

                    try:
                        loss = loss.as_ndarray()[0]
                    except:
                        loss = loss.as_ndarray()
                    loss = float(loss)

                    temp_train_batch_loss_list.append(loss)
                    self.last_batch_loss = loss
                    self.sync_batch_result()

                    if self.stop_event.is_set():
                        # Watch stop event
                        self.updated = True
                        return

                    reg_loss.grad().update(
                        model.get_optimizer(
                            current_loss=loss,
                            current_epoch=e,
                            total_epoch=self.total_epoch,
                            current_batch=b - 1,
                            total_batch=self.total_batch,
                            avg_valid_loss_list=self.valid_loss_list))

                # Thread value changed.
                self.updated = True

            self.train_loss_list.append(np.mean(temp_train_batch_loss_list))
            self.sync_train_loss()

            self.updated = True

            release_mem_pool()
            self.running_state = RunningState.VALIDATING
            self.sync_state()

            if self.task_id != Task.DETECTION.value:
                valid_target = []
            valid_prediction = []
            temp_valid_batch_loss_list = []
            model.set_models(inference=True)
            for b, (valid_x, valid_y) in enumerate(
                    self.valid_dist.batch(self.batch_size, shuffle=False)):

                if self.stop_event.is_set():
                    # Watch stop event
                    self.updated = True
                    return

                valid_prediction_in_batch = model(valid_x)
                loss = model.loss(valid_prediction_in_batch, valid_y)
                if self.task_id == Task.CLASSIFICATION.value:
                    valid_prediction.append(
                        rm.softmax(valid_prediction_in_batch).as_ndarray())
                else:
                    valid_prediction.append(
                        valid_prediction_in_batch.as_ndarray())

                if self.task_id != Task.DETECTION.value:
                    valid_target.append(valid_y)

                try:
                    loss = loss.as_ndarray()[0]
                except:
                    loss = loss.as_ndarray()
                loss = float(loss)
                temp_valid_batch_loss_list.append(loss)

            self.valid_loss_list.append(np.mean(temp_valid_batch_loss_list))
            self.sync_valid_loss()

            if self.stop_event.is_set():
                # Watch stop event
                self.updated = True
                return

            valid_prediction = np.concatenate(valid_prediction, axis=0)
            if self.task_id != Task.DETECTION.value:
                valid_target = np.concatenate(valid_target, axis=0)
            n_valid = min(len(valid_prediction), len(valid_target))

            # Depends on each task.
            loss = self.valid_loss_list[-1]
            if self.task_id == Task.CLASSIFICATION.value:
                pred = np.argmax(valid_prediction, axis=1)
                targ = np.argmax(valid_target, axis=1)
                _, pr, _, rc, _, f1 = precision_recall_f1_score(pred, targ)
                prediction = [{
                    "score": [float(vc) for vc in v],
                    "class": float(p)
                } for v, p in zip(valid_prediction, pred)]
                if self.best_epoch_valid_result:
                    if self.best_epoch_valid_result["f1"] <= f1:
                        self.best_valid_changed = True
                        self.save_best_model()
                        self.best_epoch_valid_result = {
                            "nth_epoch": e,
                            "prediction": prediction,
                            "recall": float(rc),
                            "precision": float(pr),
                            "f1": float(f1),
                            "loss": float(loss)
                        }
                else:
                    self.best_valid_changed = True
                    self.save_best_model()
                    self.best_epoch_valid_result = {
                        "nth_epoch": e,
                        "prediction": prediction,
                        "recall": float(rc),
                        "precision": float(pr),
                        "f1": float(f1),
                        "loss": float(loss)
                    }
                self.sync_best_valid_result()

            elif self.task_id == Task.DETECTION.value:
                prediction_box = model.get_bbox(valid_prediction[:n_valid])
                prec, rec, _, iou = get_prec_rec_iou(prediction_box,
                                                     valid_target[:n_valid])
                _, mAP = get_ap_and_map(prec, rec)
                if self.best_epoch_valid_result:
                    if self.best_epoch_valid_result["mAP"] <= mAP:
                        self.best_valid_changed = True
                        self.save_best_model()
                        self.best_epoch_valid_result = {
                            "nth_epoch": e,
                            "prediction": prediction_box,
                            "mAP": float(mAP),
                            "IOU": float(iou),
                            "loss": float(loss)
                        }
                else:
                    self.best_valid_changed = True
                    self.save_best_model()
                    self.best_epoch_valid_result = {
                        "nth_epoch": e,
                        "prediction": prediction_box,
                        "mAP": float(mAP),
                        "IOU": float(iou),
                        "loss": float(loss)
                    }
                self.sync_best_valid_result()
            elif self.task_id == Task.SEGMENTATION.value:
                pred = np.argmax(valid_prediction, axis=1)
                targ = np.argmax(valid_target, axis=1)
                _, pr, _, rc, _, f1, _, _, _, _ = \
                    get_segmentation_metrics(pred, targ, n_class=len(self.class_map))

                prediction = []
                for p, t in zip(pred, targ):
                    lep, lemp, ler, lemr, _, _, _, _, _, _ = get_segmentation_metrics(
                        p[None], t[None], n_class=len(self.class_map))
                    prediction.append({
                        "class": p.astype(np.int).tolist(),
                        "recall": {k: float(v)
                                   for k, v in ler.items()},
                        "precision": {k: float(v)
                                      for k, v in lep.items()},
                    })

                if self.best_epoch_valid_result:
                    if self.best_epoch_valid_result["f1"] <= f1:
                        self.best_valid_changed = True
                        self.save_best_model()
                        self.best_epoch_valid_result = {
                            "nth_epoch": e,
                            "prediction": prediction,
                            "recall": float(rc),
                            "precision": float(pr),
                            "f1": float(f1),
                            "loss": float(loss)
                        }
                else:
                    self.best_valid_changed = True
                    self.save_best_model()
                    self.best_epoch_valid_result = {
                        "nth_epoch": e,
                        "prediction": prediction,
                        "recall": float(rc),
                        "precision": float(pr),
                        "f1": float(f1),
                        "loss": float(loss)
                    }
                self.sync_best_valid_result()

            # Thread value changed.
            self.save_last_model()
            self.updated = True
Esempio n. 13
0
 def predict(self, x):
     y = self.predictor(x)
     return rm.softmax(y)
Esempio n. 14
0
    def _oper_cpu(cls, output, t, bbox, classes, init_anchors):
        batch_size, _, grid_h, grid_w = output.shape
        output_reshape = rm.reshape(
            output, (batch_size, bbox, classes + 5, grid_h, grid_w))
        x, y, w, h, conf, prob = output_reshape[:, :, 0:
                                                1, :, :], output_reshape[:, :,
                                                                         1:
                                                                         2, :, :], output_reshape[:, :,
                                                                                                  2:
                                                                                                  3, :, :], output_reshape[:, :,
                                                                                                                           3:
                                                                                                                           4, :, :], output_reshape[:, :,
                                                                                                                                                    4:
                                                                                                                                                    5, :, :], output_reshape[:, :,
                                                                                                                                                                             5:, :, :]
        x = rm.sigmoid(x)
        y = rm.sigmoid(y)
        conf = rm.sigmoid(conf)
        prob = rm.transpose(prob,
                            (0, 2, 1, 3, 4)).reshape(batch_size, classes, -1)
        prob = rm.softmax(prob)
        # prob_exp = np.exp(prob)
        # prob = prob_exp / np.sum(prob_exp, axis=1, keepdims=True)
        prob = rm.reshape(prob, (batch_size, classes, bbox, grid_h, grid_w))
        deltas = np.zeros(output_reshape.shape, dtype=np.float32)

        #x.to_cpu()
        #y.to_cpu()
        #conf.to_cpu()
        #prob.to_cpu()
        #anchor
        if init_anchors is None:
            anchors = [[5.375, 5.03125], [5.40625, 4.6875], [2.96875, 2.53125],
                       [2.59375, 2.78125], [1.9375, 3.25]]
        else:
            anchors = init_anchors

        thresh = 0.7
        # 教師データ
        tw = np.ones(w.shape, dtype=np.float32)
        th = np.ones(h.shape, dtype=np.float32)
        tx = np.tile(0.5, x.shape).astype(np.float32)
        ty = np.tile(0.5, y.shape).astype(np.float32)
        box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)

        tconf = np.zeros(conf.shape, dtype=np.float32)
        conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

        tprob = prob.as_ndarray()
        #print("output")
        #print(output_reshape[1,1, :,1,1])
        x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32),
                                  x.shape[1:])
        y_shift = np.broadcast_to(
            np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
            y.shape[1:])
        w_anchor = np.broadcast_to(
            np.reshape(
                np.array(anchors, dtype=np.float32)[:, 0], (bbox, 1, 1, 1)),
            w.shape[1:])
        h_anchor = np.broadcast_to(
            np.reshape(
                np.array(anchors, dtype=np.float32)[:, 1], (bbox, 1, 1, 1)),
            h.shape[1:])
        #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()

        best_ious = []
        for batch in range(batch_size):
            truth_bbox = len(t[batch])
            box_x = (x[batch] + x_shift) / grid_w
            box_y = (y[batch] + y_shift) / grid_h
            box_w = np.exp(w[batch]) * w_anchor / grid_w
            box_h = np.exp(h[batch]) * h_anchor / grid_h
            ious = []
            for truth_index in range(truth_bbox):
                truth_box_x = np.broadcast_to(
                    np.array(t[batch][truth_index]["x"], dtype=np.float32),
                    box_x.shape)
                truth_box_y = np.broadcast_to(
                    np.array(t[batch][truth_index]["y"], dtype=np.float32),
                    box_y.shape)
                truth_box_w = np.broadcast_to(
                    np.array(t[batch][truth_index]["w"], dtype=np.float32),
                    box_w.shape)
                truth_box_h = np.broadcast_to(
                    np.array(t[batch][truth_index]["h"], dtype=np.float32),
                    box_h.shape)
                #truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu()
                ious.append(
                    multi_box_iou(
                        Box(box_x, box_y, box_w, box_h),
                        Box(truth_box_x, truth_box_y, truth_box_w,
                            truth_box_h)))
            ious = np.array(ious)
            best_ious.append(np.max(ious, axis=0))
        best_ious = np.array(best_ious)
        tconf[best_ious > thresh] = conf[best_ious > thresh]
        conf_learning_scale[best_ious > thresh] = 0

        abs_anchors = anchors / np.array([grid_w, grid_h])
        for batch in range(batch_size):
            for truth_box in t[batch]:
                truth_h = int(float(truth_box["x"]) * grid_w)
                truth_w = int(float(truth_box["y"]) * grid_h)
                truth_n = 0
                best_iou = 0.0
                for anchor_index, abs_anchor in enumerate(abs_anchors):
                    iou = box_iou(
                        Box(0, 0, float(truth_box["w"]),
                            float(truth_box["h"])),
                        Box(0, 0, abs_anchor[0], abs_anchor[1]))
                    if best_iou < iou:
                        best_iou = iou
                        truth_n = anchor_index

                box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0
                tx[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["x"]) * grid_w - truth_w
                ty[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["y"]) * grid_h - truth_h
                tw[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["w"]) / abs_anchors[truth_n][0]
                th[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["h"]) / abs_anchors[truth_n][1]
                tprob[batch, :, truth_n, truth_h, truth_w] = 0
                tprob[batch,
                      int(truth_box["label"]), truth_n, truth_h, truth_w] = 1

                full_truth_box = Box(float(truth_box["x"]),
                                     float(truth_box["y"]),
                                     float(truth_box["w"]),
                                     float(truth_box["h"]))
                predicted_box = Box(
                    (x[batch, truth_n, 0, truth_h, truth_w] + truth_w) /
                    grid_w,
                    (y[batch, truth_n, 0, truth_h, truth_w] + truth_h) /
                    grid_h,
                    np.exp(w[batch, truth_n, 0, truth_h, truth_w]) *
                    abs_anchors[truth_n][0],
                    np.exp(h[batch, truth_n, 0, truth_h, truth_w]) *
                    abs_anchors[truth_n][1])
                predicted_iou = box_iou(full_truth_box, predicted_box)
                tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 5.0

        #box_learning_scale *= 100
        #loss
        #print(np.where(box_learning_scale==1))
        x_loss = np.sum((tx - x)**2 * box_learning_scale) / 2
        #print(deltas[:,:,0:1,:,:])
        deltas[:, :, 0:1, :, :] = ((x - tx) * box_learning_scale *
                                   (1 - x) * x).as_ndarray() * 10
        #print(deltas.dtype())
        #print((x - tx).dtype())
        #print(deltas[:,:,0:1,:,:] - ((x - tx) *box_learning_scale * (1 - x) * x))
        #print(x-tx)
        #print(deltas[:,:,0,:,:])
        y_loss = np.sum((ty - y)**2 * box_learning_scale) / 2
        deltas[:, :, 1:2, :, :] = ((y - ty) * box_learning_scale *
                                   (1 - y) * y).as_ndarray() * 10
        w_loss = np.sum((tw - np.exp(w))**2 * box_learning_scale) / 2
        deltas[:, :,
               2:3, :, :] = ((np.exp(w) - tw) * box_learning_scale * np.exp(w))
        h_loss = np.sum((th - np.exp(h))**2 * box_learning_scale) / 2
        deltas[:, :,
               3:4, :, :] = ((np.exp(h) - th) * box_learning_scale * np.exp(h))
        c_loss = np.sum((tconf - conf)**2 * conf_learning_scale) / 2
        deltas[:, :, 4:5, :, :] = ((conf - tconf) * conf_learning_scale *
                                   (1 - conf) * conf).as_ndarray()
        #print(deltas[:,:,4:5,:,:])
        #print(deltas[:,:,4:5,:,:] - (conf - tconf) * conf_learning_scale * (1 - conf) * conf)
        p_loss = np.sum((tprob - prob)**2) / 2
        deltas[:, :, 5:, :, :] = ((
            ((prob - tprob) *
             (1 - prob) * prob)).as_ndarray()).transpose(0, 2, 1, 3, 4) * 10
        #print(deltas[:,:,5:,:,:] - ((prob - tprob) * (1 - prob) * prob).transpose(0, 2, 1, 3, 4))
        if np.isnan(p_loss):
            p_loss = 0
        print(
            "x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f   p_loss: %f"
            % (x_loss, y_loss, w_loss, h_loss, c_loss, p_loss))

        loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss
        #loss = p_loss
        ret = cls._create_node(loss)
        ret.attrs._output = output
        ret.attrs._deltas = deltas.reshape(batch_size, bbox * (classes + 5),
                                           grid_h, grid_w)
        # ret.attrs._cells = cells
        # ret.attrs._bbox = bbox
        # ret.attrs._classes = classes
        return ret
Esempio n. 15
0
    def get_bbox(self,
                 z,
                 score_threshold=0.3,
                 nms_threshold=0.4,
                 keep_top_k=200):
        """
        Example:
            >>> z = model(x)
            >>> model.get_bbox(z)
            [[{'box': [0.21, 0.44, 0.11, 0.32], 'score':0.823, 'class':1}],
             [{'box': [0.87, 0.38, 0.84, 0.22], 'score':0.423, 'class':0}]]

        Args:
            z (ndarray): Output array of neural network. The shape of array
            score_threshold (float): The threshold for confidence score.
                                     Predicted boxes which have lower confidence score than the threshold are discarderd.
                                     Defaults to 0.3
            nms_threshold (float): The threshold for non maximum supression. Defaults to 0.4

        Return:
            (list): List of predicted bbox, score and class of each image.
                The format of return value is bellow. Box coordinates and size will be returned as
                ratio to the original image size. Therefore the range of 'box' is [0 ~ 1].

            [
                [ # Prediction of first image.
                    {'box': [x, y, w, h], 'score':(float), 'class':(int)},
                    {'box': [x, y, w, h], 'score':(float), 'class':(int)},
                    ...
                ],
                [ # Prediction of second image.
                    {'box': [x, y, w, h], 'score':(float), 'class':(int)},
                    {'box': [x, y, w, h], 'score':(float), 'class':(int)},
                    ...
                ],
                ...
            ]

        Note:
            Box coordinate and size will be returned as ratio to the original image size.
            Therefore the range of 'box' is [0 ~ 1].


        """
        z[:, 4:-8, :] = rm.softmax(z[:, 4:-8, :]).as_ndarray()
        z = z.transpose((0, 2, 1))
        mbox_loc = z[:, :, :4]
        variances = z[:, :, -4:]
        mbox_priorbox = z[:, :, -8:-4]
        mbox_conf = z[:, :, 4:-8]
        results = []
        for i in range(len(mbox_loc)):
            results.append([])
            decoded_bbox = self.decode_boxes(mbox_loc[i], mbox_priorbox[i],
                                             variances[i])
            for c in range(self.num_class):
                if c == 0:
                    # background
                    continue
                c_confs = mbox_conf[i, :, c]
                c_confs_m = c_confs > score_threshold
                if len(c_confs[c_confs_m]) > 0:
                    boxes_to_process = decoded_bbox[c_confs_m]
                    confs_to_process = c_confs[c_confs_m]
                    idx = self.nms(boxes_to_process, confs_to_process,
                                   nms_threshold)
                    good_boxes = boxes_to_process[idx]
                    confs = confs_to_process[idx][:, None]

                    for j in range(len(confs)):
                        results[-1].append({
                            "class": c - 1,
                            "score": float(confs[j]),
                            "box": good_boxes[j],
                            'name': self.class_map[c - 1]
                        })
            if len(results[-1]) > 0:
                scores = np.array([obj['score'] for obj in results[-1]])
                argsort = np.argsort(scores)[::-1]
                results[-1] = np.array(results[-1])[argsort]
                results[-1] = results[-1][:keep_top_k].tolist()
        return results