Example #1
0
 def forward(self, x):
     el_out = rm.sigmoid(self._encodelayer(x))
     l = rm.sigmoid(self._encodedlayer(el_out))
     dl_out = rm.sigmoid(self._decodelayer(l))
     g = self._decodedlayer(dl_out)
     loss = rm.mse(g, x)
     return loss
Example #2
0
    def forward(self, x):
        """Performs forward propagation.
        This function can be called using ``__call__`` method.
        See following example of method usage.

        Args:
            x (ndarray, Node): Input image as an tensor.

        Returns:
            (Node): Returns raw output of yolo v1.
            You can reform it to bounding box form using the method ``get_bbox``.

        Example:
            >>> import numpy as np
            >>> from renom_img.api.detection.yolo_v2 import Yolov2
            >>>
            >>> x = np.random.rand(1, 3, 224, 224)
            >>> class_map = ["dog", "cat"]
            >>> model = Yolov2(class_map)
            >>> y = model.forward(x) # Forward propagation.
            >>> y = model(x)  # Same as above result.
            >>>
            >>> bbox = model.get_bbox(y) # The output can be reformed using get_bbox method.

        """

        assert len(self.class_map) > 0, \
            "Class map is empty. Please set the attribute class_map when instantiate model class. " +\
            "Or, please load already trained model using the method 'load()'."
        assert self.num_anchor > 0, \
            "Anchor list is empty. Please calculate anchor list using create_anchor function, before instantiate model class.  " +\
            "Or, please load already trained model using the method 'load()'."

        self._freezed_network.set_auto_update(self.train_whole_network)
        self._freezed_network.set_models(inference=(
            not self.train_whole_network or getattr(self, 'inference', False)))

        h, f = self._freezed_network(x)
        f = self._conv21(f)
        h = self._conv1(h)

        h = self._conv2(rm.concat(h,
                                  rm.concat([f[:, :, i::2, j::2] for i in range(2) for j in range(2)])))

        out = self._last(h)
        # Create yolo format.
        N, C, H, W = h.shape

        reshaped = out.reshape(N, self.num_anchor, -1, W * H)
        conf = rm.sigmoid(reshaped[:, :, 0:1]).transpose(0, 2, 1, 3)
        px = rm.sigmoid(reshaped[:, :, 1:2]).transpose(0, 2, 1, 3)
        py = rm.sigmoid(reshaped[:, :, 2:3]).transpose(0, 2, 1, 3)
        pw = rm.exp(reshaped[:, :, 3:4]).transpose(0, 2, 1, 3)
        ph = rm.exp(reshaped[:, :, 4:5]).transpose(0, 2, 1, 3)
        cl = rm.softmax(reshaped[:, :, 5:].transpose(0, 2, 1, 3))
        return rm.concat(conf, px, py, pw, ph, cl).transpose(0, 2, 1, 3).reshape(N, -1, H, W)
Example #3
0
def test_gpu_node_sigmoid(a):
    set_cuda_active(True)

    g1 = Variable(a)

    g3 = rm.sum(rm.sigmoid(g1))
    g = g3.grad()
    g_g1 = g.get(g1)
    g3.to_cpu()

    set_cuda_active(False)
    c3 = rm.sum(rm.sigmoid(g1))
    c = c3.grad()
    c_g1 = c.get(g1)

    close(g3, c3)
    close(c_g1, g_g1)
Example #4
0
 def forward(self, x):
     el1_out = rm.sigmoid(self._encodelayer1(x))
     el2_out = rm.sigmoid(self._encodelayer2(el1_out))
     el3_out = rm.sigmoid(self._encodelayer3(el2_out))
     l = rm.sigmoid(self._encodedlayer(el3_out))
     dl1_out = rm.sigmoid(self._decodelayer1(l))
     dl2_out = rm.sigmoid(self._decodelayer2(dl1_out))
     dl3_out = rm.sigmoid(self._decodelayer3(dl2_out))
     g = self._decodedlayer(dl3_out)
     loss = rm.mse(g, x)
     return loss
Example #5
0
def yolo_predict(model, input_x):
    output = model(input_x)
    batch_size, _, grid_h, grid_w = output.shape
    output_reshape = np.reshape(output, (batch_size, model.bbox, model.classes+5, grid_h, grid_w))
    x, y, w, h, conf, prob = output_reshape[:,:,0:1,:,:], output_reshape[:,:,1:2,:,:],output_reshape[:,:,2:3,:,:], output_reshape[:,:,3:4,:,:], output_reshape[:,:,4:5,:,:], output_reshape[:,:,5:,:,:]
    x = rm.sigmoid(x) # xのactivation
    y = rm.sigmoid(y) # yのactivation
    conf = rm.sigmoid(conf) # confのactivation
    prob = np.transpose(prob, (0, 2, 1, 3, 4))
    prob = rm.softmax(prob) # probablitiyのacitivation
    prob = np.transpose(prob, (0, 2, 1, 3, 4))

    # x, y, w, hを絶対座標へ変換
    x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)
    y_shift = np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)
    w_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 0], (model.bbox, 1, 1, 1)), w.shape)
    h_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 1], (model.bbox, 1, 1, 1)), h.shape)
    #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()
    box_x = (x + x_shift) / grid_w
    box_y = (y + y_shift) / grid_h
    box_w = np.exp(w) * w_anchor / grid_w
    box_h = np.exp(h) * h_anchor / grid_h

    return box_x, box_y, box_w, box_h, conf, prob
 def forward(self, x):
     h = self.transform(x)
     #print(h.shape)
     h = rm.reshape(h, (len(x), self.channels, self.dim, self.dim))
     #print(h.shape)
     layers = self.hidden._layers
     for i in range(len(layers)):
         if self.batch_normal:
             h = layers[2 * i](h)
             h = rm.relu(layers[2 * i + 1](h))
         else:
             h = rm.relu(layers[i](h))
         #print(h.shape)
     h = rm.sigmoid(self.output(h))
     return h
Example #7
0
 def forward(self, x):
     print("")
     print("input:\n{}".format(x))
     print("input shape:{}".format(x.shape))
     print("")
     t1 = self.layer1(x)
     print("input x hidden weight:\n{}".format(self.layer1.params.w))
     print("input x hidden bias:\n{}".format(self.layer1.params.b))
     print("")
     print("hidden:\n{}".format(t1))
     print("hidden shape:{}".format(t1.shape))
     t2 = rm.sigmoid(t1)
     print("")
     print("relu:\n{}".format(t2))
     print("relu shape:{}".format(t2.shape))
     print("")
     t3 = self.layer2(t2)
     print("hidden x output weight:\n{}".format(self.layer2.params.w))
     print("hidden x output bias:\n{}".format(self.layer2.params.b))
     print("")
     print("output:\n{}".format(t3))
     print("output shape:{}".format(t3.shape))
     print("")
     return t3
 def forward(self, x):
     self.lth = self.cnn1(x)
     hidden = self.cnn2(self.lth)
     self.raw_output = self.output(hidden)
     return rm.sigmoid(self.raw_output)
Example #9
0
 def predict(self, x):
     output_network = list(map(int, rm.sigmoid(self.network(x)).as_ndarray() > 0.5))
     result_array = [self.lb.classes_[output] for output in output_network]
     return np.array(result_array)
Example #10
0
 def encode(self, x):
     el1_out = rm.sigmoid(self._encodelayer1(x))
     el2_out = rm.sigmoid(self._encodelayer2(el1_out))
     l = self._encodedlayer(el2_out)
     return l
Example #11
0
 def encode(self, x):
     el_out = rm.sigmoid(self._encodelayer(x))
     l = self._encodedlayer(el_out)
     return l
Example #12
0
    def _oper_cpu(cls, output, t, bbox, classes, init_anchors):
        batch_size, _, grid_h, grid_w = output.shape
        output_reshape = rm.reshape(
            output, (batch_size, bbox, classes + 5, grid_h, grid_w))
        x, y, w, h, conf, prob = output_reshape[:, :, 0:
                                                1, :, :], output_reshape[:, :,
                                                                         1:
                                                                         2, :, :], output_reshape[:, :,
                                                                                                  2:
                                                                                                  3, :, :], output_reshape[:, :,
                                                                                                                           3:
                                                                                                                           4, :, :], output_reshape[:, :,
                                                                                                                                                    4:
                                                                                                                                                    5, :, :], output_reshape[:, :,
                                                                                                                                                                             5:, :, :]
        x = rm.sigmoid(x)
        y = rm.sigmoid(y)
        conf = rm.sigmoid(conf)
        prob = rm.transpose(prob,
                            (0, 2, 1, 3, 4)).reshape(batch_size, classes, -1)
        prob = rm.softmax(prob)
        # prob_exp = np.exp(prob)
        # prob = prob_exp / np.sum(prob_exp, axis=1, keepdims=True)
        prob = rm.reshape(prob, (batch_size, classes, bbox, grid_h, grid_w))
        deltas = np.zeros(output_reshape.shape, dtype=np.float32)

        #x.to_cpu()
        #y.to_cpu()
        #conf.to_cpu()
        #prob.to_cpu()
        #anchor
        if init_anchors is None:
            anchors = [[5.375, 5.03125], [5.40625, 4.6875], [2.96875, 2.53125],
                       [2.59375, 2.78125], [1.9375, 3.25]]
        else:
            anchors = init_anchors

        thresh = 0.7
        # 教師データ
        tw = np.ones(w.shape, dtype=np.float32)
        th = np.ones(h.shape, dtype=np.float32)
        tx = np.tile(0.5, x.shape).astype(np.float32)
        ty = np.tile(0.5, y.shape).astype(np.float32)
        box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)

        tconf = np.zeros(conf.shape, dtype=np.float32)
        conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

        tprob = prob.as_ndarray()
        #print("output")
        #print(output_reshape[1,1, :,1,1])
        x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32),
                                  x.shape[1:])
        y_shift = np.broadcast_to(
            np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
            y.shape[1:])
        w_anchor = np.broadcast_to(
            np.reshape(
                np.array(anchors, dtype=np.float32)[:, 0], (bbox, 1, 1, 1)),
            w.shape[1:])
        h_anchor = np.broadcast_to(
            np.reshape(
                np.array(anchors, dtype=np.float32)[:, 1], (bbox, 1, 1, 1)),
            h.shape[1:])
        #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu()

        best_ious = []
        for batch in range(batch_size):
            truth_bbox = len(t[batch])
            box_x = (x[batch] + x_shift) / grid_w
            box_y = (y[batch] + y_shift) / grid_h
            box_w = np.exp(w[batch]) * w_anchor / grid_w
            box_h = np.exp(h[batch]) * h_anchor / grid_h
            ious = []
            for truth_index in range(truth_bbox):
                truth_box_x = np.broadcast_to(
                    np.array(t[batch][truth_index]["x"], dtype=np.float32),
                    box_x.shape)
                truth_box_y = np.broadcast_to(
                    np.array(t[batch][truth_index]["y"], dtype=np.float32),
                    box_y.shape)
                truth_box_w = np.broadcast_to(
                    np.array(t[batch][truth_index]["w"], dtype=np.float32),
                    box_w.shape)
                truth_box_h = np.broadcast_to(
                    np.array(t[batch][truth_index]["h"], dtype=np.float32),
                    box_h.shape)
                #truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu()
                ious.append(
                    multi_box_iou(
                        Box(box_x, box_y, box_w, box_h),
                        Box(truth_box_x, truth_box_y, truth_box_w,
                            truth_box_h)))
            ious = np.array(ious)
            best_ious.append(np.max(ious, axis=0))
        best_ious = np.array(best_ious)
        tconf[best_ious > thresh] = conf[best_ious > thresh]
        conf_learning_scale[best_ious > thresh] = 0

        abs_anchors = anchors / np.array([grid_w, grid_h])
        for batch in range(batch_size):
            for truth_box in t[batch]:
                truth_h = int(float(truth_box["x"]) * grid_w)
                truth_w = int(float(truth_box["y"]) * grid_h)
                truth_n = 0
                best_iou = 0.0
                for anchor_index, abs_anchor in enumerate(abs_anchors):
                    iou = box_iou(
                        Box(0, 0, float(truth_box["w"]),
                            float(truth_box["h"])),
                        Box(0, 0, abs_anchor[0], abs_anchor[1]))
                    if best_iou < iou:
                        best_iou = iou
                        truth_n = anchor_index

                box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0
                tx[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["x"]) * grid_w - truth_w
                ty[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["y"]) * grid_h - truth_h
                tw[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["w"]) / abs_anchors[truth_n][0]
                th[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["h"]) / abs_anchors[truth_n][1]
                tprob[batch, :, truth_n, truth_h, truth_w] = 0
                tprob[batch,
                      int(truth_box["label"]), truth_n, truth_h, truth_w] = 1

                full_truth_box = Box(float(truth_box["x"]),
                                     float(truth_box["y"]),
                                     float(truth_box["w"]),
                                     float(truth_box["h"]))
                predicted_box = Box(
                    (x[batch, truth_n, 0, truth_h, truth_w] + truth_w) /
                    grid_w,
                    (y[batch, truth_n, 0, truth_h, truth_w] + truth_h) /
                    grid_h,
                    np.exp(w[batch, truth_n, 0, truth_h, truth_w]) *
                    abs_anchors[truth_n][0],
                    np.exp(h[batch, truth_n, 0, truth_h, truth_w]) *
                    abs_anchors[truth_n][1])
                predicted_iou = box_iou(full_truth_box, predicted_box)
                tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 5.0

        #box_learning_scale *= 100
        #loss
        #print(np.where(box_learning_scale==1))
        x_loss = np.sum((tx - x)**2 * box_learning_scale) / 2
        #print(deltas[:,:,0:1,:,:])
        deltas[:, :, 0:1, :, :] = ((x - tx) * box_learning_scale *
                                   (1 - x) * x).as_ndarray() * 10
        #print(deltas.dtype())
        #print((x - tx).dtype())
        #print(deltas[:,:,0:1,:,:] - ((x - tx) *box_learning_scale * (1 - x) * x))
        #print(x-tx)
        #print(deltas[:,:,0,:,:])
        y_loss = np.sum((ty - y)**2 * box_learning_scale) / 2
        deltas[:, :, 1:2, :, :] = ((y - ty) * box_learning_scale *
                                   (1 - y) * y).as_ndarray() * 10
        w_loss = np.sum((tw - np.exp(w))**2 * box_learning_scale) / 2
        deltas[:, :,
               2:3, :, :] = ((np.exp(w) - tw) * box_learning_scale * np.exp(w))
        h_loss = np.sum((th - np.exp(h))**2 * box_learning_scale) / 2
        deltas[:, :,
               3:4, :, :] = ((np.exp(h) - th) * box_learning_scale * np.exp(h))
        c_loss = np.sum((tconf - conf)**2 * conf_learning_scale) / 2
        deltas[:, :, 4:5, :, :] = ((conf - tconf) * conf_learning_scale *
                                   (1 - conf) * conf).as_ndarray()
        #print(deltas[:,:,4:5,:,:])
        #print(deltas[:,:,4:5,:,:] - (conf - tconf) * conf_learning_scale * (1 - conf) * conf)
        p_loss = np.sum((tprob - prob)**2) / 2
        deltas[:, :, 5:, :, :] = ((
            ((prob - tprob) *
             (1 - prob) * prob)).as_ndarray()).transpose(0, 2, 1, 3, 4) * 10
        #print(deltas[:,:,5:,:,:] - ((prob - tprob) * (1 - prob) * prob).transpose(0, 2, 1, 3, 4))
        if np.isnan(p_loss):
            p_loss = 0
        print(
            "x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f   p_loss: %f"
            % (x_loss, y_loss, w_loss, h_loss, c_loss, p_loss))

        loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss
        #loss = p_loss
        ret = cls._create_node(loss)
        ret.attrs._output = output
        ret.attrs._deltas = deltas.reshape(batch_size, bbox * (classes + 5),
                                           grid_h, grid_w)
        # ret.attrs._cells = cells
        # ret.attrs._bbox = bbox
        # ret.attrs._classes = classes
        return ret