def forward(self, x): el_out = rm.sigmoid(self._encodelayer(x)) l = rm.sigmoid(self._encodedlayer(el_out)) dl_out = rm.sigmoid(self._decodelayer(l)) g = self._decodedlayer(dl_out) loss = rm.mse(g, x) return loss
def forward(self, x): """Performs forward propagation. This function can be called using ``__call__`` method. See following example of method usage. Args: x (ndarray, Node): Input image as an tensor. Returns: (Node): Returns raw output of yolo v1. You can reform it to bounding box form using the method ``get_bbox``. Example: >>> import numpy as np >>> from renom_img.api.detection.yolo_v2 import Yolov2 >>> >>> x = np.random.rand(1, 3, 224, 224) >>> class_map = ["dog", "cat"] >>> model = Yolov2(class_map) >>> y = model.forward(x) # Forward propagation. >>> y = model(x) # Same as above result. >>> >>> bbox = model.get_bbox(y) # The output can be reformed using get_bbox method. """ assert len(self.class_map) > 0, \ "Class map is empty. Please set the attribute class_map when instantiate model class. " +\ "Or, please load already trained model using the method 'load()'." assert self.num_anchor > 0, \ "Anchor list is empty. Please calculate anchor list using create_anchor function, before instantiate model class. " +\ "Or, please load already trained model using the method 'load()'." self._freezed_network.set_auto_update(self.train_whole_network) self._freezed_network.set_models(inference=( not self.train_whole_network or getattr(self, 'inference', False))) h, f = self._freezed_network(x) f = self._conv21(f) h = self._conv1(h) h = self._conv2(rm.concat(h, rm.concat([f[:, :, i::2, j::2] for i in range(2) for j in range(2)]))) out = self._last(h) # Create yolo format. N, C, H, W = h.shape reshaped = out.reshape(N, self.num_anchor, -1, W * H) conf = rm.sigmoid(reshaped[:, :, 0:1]).transpose(0, 2, 1, 3) px = rm.sigmoid(reshaped[:, :, 1:2]).transpose(0, 2, 1, 3) py = rm.sigmoid(reshaped[:, :, 2:3]).transpose(0, 2, 1, 3) pw = rm.exp(reshaped[:, :, 3:4]).transpose(0, 2, 1, 3) ph = rm.exp(reshaped[:, :, 4:5]).transpose(0, 2, 1, 3) cl = rm.softmax(reshaped[:, :, 5:].transpose(0, 2, 1, 3)) return rm.concat(conf, px, py, pw, ph, cl).transpose(0, 2, 1, 3).reshape(N, -1, H, W)
def test_gpu_node_sigmoid(a): set_cuda_active(True) g1 = Variable(a) g3 = rm.sum(rm.sigmoid(g1)) g = g3.grad() g_g1 = g.get(g1) g3.to_cpu() set_cuda_active(False) c3 = rm.sum(rm.sigmoid(g1)) c = c3.grad() c_g1 = c.get(g1) close(g3, c3) close(c_g1, g_g1)
def forward(self, x): el1_out = rm.sigmoid(self._encodelayer1(x)) el2_out = rm.sigmoid(self._encodelayer2(el1_out)) el3_out = rm.sigmoid(self._encodelayer3(el2_out)) l = rm.sigmoid(self._encodedlayer(el3_out)) dl1_out = rm.sigmoid(self._decodelayer1(l)) dl2_out = rm.sigmoid(self._decodelayer2(dl1_out)) dl3_out = rm.sigmoid(self._decodelayer3(dl2_out)) g = self._decodedlayer(dl3_out) loss = rm.mse(g, x) return loss
def yolo_predict(model, input_x): output = model(input_x) batch_size, _, grid_h, grid_w = output.shape output_reshape = np.reshape(output, (batch_size, model.bbox, model.classes+5, grid_h, grid_w)) x, y, w, h, conf, prob = output_reshape[:,:,0:1,:,:], output_reshape[:,:,1:2,:,:],output_reshape[:,:,2:3,:,:], output_reshape[:,:,3:4,:,:], output_reshape[:,:,4:5,:,:], output_reshape[:,:,5:,:,:] x = rm.sigmoid(x) # xのactivation y = rm.sigmoid(y) # yのactivation conf = rm.sigmoid(conf) # confのactivation prob = np.transpose(prob, (0, 2, 1, 3, 4)) prob = rm.softmax(prob) # probablitiyのacitivation prob = np.transpose(prob, (0, 2, 1, 3, 4)) # x, y, w, hを絶対座標へ変換 x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape) y_shift = np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape) w_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 0], (model.bbox, 1, 1, 1)), w.shape) h_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 1], (model.bbox, 1, 1, 1)), h.shape) #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = np.exp(w) * w_anchor / grid_w box_h = np.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def forward(self, x): h = self.transform(x) #print(h.shape) h = rm.reshape(h, (len(x), self.channels, self.dim, self.dim)) #print(h.shape) layers = self.hidden._layers for i in range(len(layers)): if self.batch_normal: h = layers[2 * i](h) h = rm.relu(layers[2 * i + 1](h)) else: h = rm.relu(layers[i](h)) #print(h.shape) h = rm.sigmoid(self.output(h)) return h
def forward(self, x): print("") print("input:\n{}".format(x)) print("input shape:{}".format(x.shape)) print("") t1 = self.layer1(x) print("input x hidden weight:\n{}".format(self.layer1.params.w)) print("input x hidden bias:\n{}".format(self.layer1.params.b)) print("") print("hidden:\n{}".format(t1)) print("hidden shape:{}".format(t1.shape)) t2 = rm.sigmoid(t1) print("") print("relu:\n{}".format(t2)) print("relu shape:{}".format(t2.shape)) print("") t3 = self.layer2(t2) print("hidden x output weight:\n{}".format(self.layer2.params.w)) print("hidden x output bias:\n{}".format(self.layer2.params.b)) print("") print("output:\n{}".format(t3)) print("output shape:{}".format(t3.shape)) print("") return t3
def forward(self, x): self.lth = self.cnn1(x) hidden = self.cnn2(self.lth) self.raw_output = self.output(hidden) return rm.sigmoid(self.raw_output)
def predict(self, x): output_network = list(map(int, rm.sigmoid(self.network(x)).as_ndarray() > 0.5)) result_array = [self.lb.classes_[output] for output in output_network] return np.array(result_array)
def encode(self, x): el1_out = rm.sigmoid(self._encodelayer1(x)) el2_out = rm.sigmoid(self._encodelayer2(el1_out)) l = self._encodedlayer(el2_out) return l
def encode(self, x): el_out = rm.sigmoid(self._encodelayer(x)) l = self._encodedlayer(el_out) return l
def _oper_cpu(cls, output, t, bbox, classes, init_anchors): batch_size, _, grid_h, grid_w = output.shape output_reshape = rm.reshape( output, (batch_size, bbox, classes + 5, grid_h, grid_w)) x, y, w, h, conf, prob = output_reshape[:, :, 0: 1, :, :], output_reshape[:, :, 1: 2, :, :], output_reshape[:, :, 2: 3, :, :], output_reshape[:, :, 3: 4, :, :], output_reshape[:, :, 4: 5, :, :], output_reshape[:, :, 5:, :, :] x = rm.sigmoid(x) y = rm.sigmoid(y) conf = rm.sigmoid(conf) prob = rm.transpose(prob, (0, 2, 1, 3, 4)).reshape(batch_size, classes, -1) prob = rm.softmax(prob) # prob_exp = np.exp(prob) # prob = prob_exp / np.sum(prob_exp, axis=1, keepdims=True) prob = rm.reshape(prob, (batch_size, classes, bbox, grid_h, grid_w)) deltas = np.zeros(output_reshape.shape, dtype=np.float32) #x.to_cpu() #y.to_cpu() #conf.to_cpu() #prob.to_cpu() #anchor if init_anchors is None: anchors = [[5.375, 5.03125], [5.40625, 4.6875], [2.96875, 2.53125], [2.59375, 2.78125], [1.9375, 3.25]] else: anchors = init_anchors thresh = 0.7 # 教師データ tw = np.ones(w.shape, dtype=np.float32) th = np.ones(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) ty = np.tile(0.5, y.shape).astype(np.float32) box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.as_ndarray() #print("output") #print(output_reshape[1,1, :,1,1]) x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]) y_shift = np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]) w_anchor = np.broadcast_to( np.reshape( np.array(anchors, dtype=np.float32)[:, 0], (bbox, 1, 1, 1)), w.shape[1:]) h_anchor = np.broadcast_to( np.reshape( np.array(anchors, dtype=np.float32)[:, 1], (bbox, 1, 1, 1)), h.shape[1:]) #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): truth_bbox = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = np.exp(w[batch]) * w_anchor / grid_w box_h = np.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(truth_bbox): truth_box_x = np.broadcast_to( np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape) truth_box_y = np.broadcast_to( np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape) truth_box_w = np.broadcast_to( np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape) truth_box_h = np.broadcast_to( np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape) #truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h))) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) tconf[best_ious > thresh] = conf[best_ious > thresh] conf_learning_scale[best_ious > thresh] = 0 abs_anchors = anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_h = int(float(truth_box["x"]) * grid_w) truth_w = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = float(truth_box["w"]) / abs_anchors[truth_n][0] th[batch, truth_n, :, truth_h, truth_w] = float(truth_box["h"]) / abs_anchors[truth_n][1] tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch, truth_n, 0, truth_h, truth_w] + truth_w) / grid_w, (y[batch, truth_n, 0, truth_h, truth_w] + truth_h) / grid_h, np.exp(w[batch, truth_n, 0, truth_h, truth_w]) * abs_anchors[truth_n][0], np.exp(h[batch, truth_n, 0, truth_h, truth_w]) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 5.0 #box_learning_scale *= 100 #loss #print(np.where(box_learning_scale==1)) x_loss = np.sum((tx - x)**2 * box_learning_scale) / 2 #print(deltas[:,:,0:1,:,:]) deltas[:, :, 0:1, :, :] = ((x - tx) * box_learning_scale * (1 - x) * x).as_ndarray() * 10 #print(deltas.dtype()) #print((x - tx).dtype()) #print(deltas[:,:,0:1,:,:] - ((x - tx) *box_learning_scale * (1 - x) * x)) #print(x-tx) #print(deltas[:,:,0,:,:]) y_loss = np.sum((ty - y)**2 * box_learning_scale) / 2 deltas[:, :, 1:2, :, :] = ((y - ty) * box_learning_scale * (1 - y) * y).as_ndarray() * 10 w_loss = np.sum((tw - np.exp(w))**2 * box_learning_scale) / 2 deltas[:, :, 2:3, :, :] = ((np.exp(w) - tw) * box_learning_scale * np.exp(w)) h_loss = np.sum((th - np.exp(h))**2 * box_learning_scale) / 2 deltas[:, :, 3:4, :, :] = ((np.exp(h) - th) * box_learning_scale * np.exp(h)) c_loss = np.sum((tconf - conf)**2 * conf_learning_scale) / 2 deltas[:, :, 4:5, :, :] = ((conf - tconf) * conf_learning_scale * (1 - conf) * conf).as_ndarray() #print(deltas[:,:,4:5,:,:]) #print(deltas[:,:,4:5,:,:] - (conf - tconf) * conf_learning_scale * (1 - conf) * conf) p_loss = np.sum((tprob - prob)**2) / 2 deltas[:, :, 5:, :, :] = (( ((prob - tprob) * (1 - prob) * prob)).as_ndarray()).transpose(0, 2, 1, 3, 4) * 10 #print(deltas[:,:,5:,:,:] - ((prob - tprob) * (1 - prob) * prob).transpose(0, 2, 1, 3, 4)) if np.isnan(p_loss): p_loss = 0 print( "x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (x_loss, y_loss, w_loss, h_loss, c_loss, p_loss)) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss #loss = p_loss ret = cls._create_node(loss) ret.attrs._output = output ret.attrs._deltas = deltas.reshape(batch_size, bbox * (classes + 5), grid_h, grid_w) # ret.attrs._cells = cells # ret.attrs._bbox = bbox # ret.attrs._classes = classes return ret