def test_gpu_node_deconvolution2d(a): with use_cuda(): layer = rm.Deconv2d(channel=32) layer.params["w"] = rm.Variable(np.random.rand(3, 32, 3, 3)) layer.params["b"] = rm.Variable(np.random.rand(1, 32, 1, 1)) g1 = Variable(a) g2 = layer(g1) g3 = rm.sum(g2) g = g3.grad() g_g1 = g.get(layer.params["w"]) g_g2 = g.get(layer.params["b"]) g_g3 = g.get(g1) g2.to_cpu() g3.to_cpu() c2 = layer(g1) c3 = rm.sum(c2) c = c3.grad() c_g1 = c.get(layer.params["w"]) c_g2 = c.get(layer.params["b"]) c_g3 = g.get(g1) close(g2, c2) close(g3, c3) close(c_g1, g_g1) close(c_g2, g_g2) close(c_g3, g_g3)
def __init__(self, num_class=1000): self._num_class = num_class self._base = Darknet19Base() self._last = rm.Conv2d(num_class, filter=1) self._last.params = { "w": rm.Variable(self._last._initializer((num_class, 1024, 1, 1)), auto_update=True), "b": rm.Variable(self._last._initializer((1, num_class, 1, 1)), auto_update=False), }
def __init__(self, channel, filter=3, prev_ch=None): pad = int((filter - 1) / 2) if prev_ch is not None: self._conv = rm.Conv2d(channel=channel, filter=filter, padding=pad) self._conv.params = { "w": rm.Variable(self._conv._initializer( (channel, prev_ch, filter, filter)), auto_update=True), "b": rm.Variable(np.zeros((1, channel, 1, 1), dtype=np.float32), auto_update=False), } self._bn = rm.BatchNormalize(mode='feature', momentum=0.99) else: self._conv = rm.Conv2d(channel=channel, filter=filter, padding=pad) self._bn = rm.BatchNormalize(mode='feature', momentum=0.99)
def prof_add(): cuda.set_cuda_active(True) a = np.random.rand(1000, 1000).astype(np.float32) b = np.random.rand(1000, 1000).astype(np.float32) c = np.random.rand(1, 1000).astype(np.float32) ga = rm.Variable(a) gb = rm.Variable(b) gc = rm.Variable(c) start_t = time.time() for _ in range(1000): ga + gb * gc print("took time %f" % (time.time() - start_t)) start_t = time.time() for _ in range(1000): a + b * c print("took time %f" % (time.time() - start_t))
def __init__(self, class_map=None, anchor=None, imsize=(320, 320), load_pretrained_weight=False, train_whole_network=False): assert (imsize[0] / 32.) % 1 == 0 and (imsize[1] / 32.) % 1 == 0, \ "Yolo v2 only accepts 'imsize' argument which is list of multiple of 32. \ exp),imsize=(320, 320)." self.flag = False # This is used for modify loss function. self.global_counter = 0 self.anchor = [] if not isinstance(anchor, AnchorYolov2) else anchor.anchor self.anchor_size = imsize if not isinstance(anchor, AnchorYolov2) else anchor.imsize self.num_anchor = 0 if anchor is None else len(anchor) darknet = Darknet19(1) self._opt = rm.Sgd(0.001, 0.9) super(Yolov2, self).__init__(class_map, imsize, load_pretrained_weight, train_whole_network, darknet) # Initialize trainable layers. last_channel = (self.num_class + 5) * self.num_anchor self._conv1 = rm.Sequential([ DarknetConv2dBN(channel=1024, prev_ch=1024), DarknetConv2dBN(channel=1024, prev_ch=1024), ]) self._conv21 = DarknetConv2dBN(channel=64, prev_ch=512, filter=1) self._conv2 = DarknetConv2dBN(channel=1024, prev_ch=1024 + 256) self._last = rm.Conv2d(channel=last_channel, filter=1) self._freezed_network = darknet._base for model in [self._conv21, self._conv1, self._conv2]: for layer in model.iter_models(): if not layer.params: continue if isinstance(layer, rm.Conv2d): layer.params = { "w": rm.Variable(layer._initializer(layer.params.w.shape), auto_update=True), "b": rm.Variable(np.zeros_like(layer.params.b), auto_update=False), } elif isinstance(layer, rm.BatchNormalize): layer.params = { "w": rm.Variable(layer._initializer(layer.params.w.shape), auto_update=True), "b": rm.Variable(np.zeros_like(layer.params.b), auto_update=True), }
def exp_dense(): np.random.seed(10) cuda.set_cuda_active(False) a = np.random.rand(32, 320).astype(np.float32) b = np.random.rand(32, 80).astype(np.float32) layer1 = rm.Dense(input_size=320, output_size=100) layer2 = rm.Dense(input_size=100, output_size=80) ga = rm.Variable(a, auto_update=False) gb = rm.Variable(b, auto_update=False) opt = Sgd(0.01, momentum=0.3) start_t = time.time() for _ in range(500): loss = rm.Sum((layer2(rm.Sigmoid(layer1(ga))) - gb)**2) / 32 loss.ensure_cpu() print(loss) grad = loss.grad() grad.update(opt) print(time.time() - start_t)
def exp_convolution2(): np.random.seed(10) cuda.set_cuda_active(True) a = np.random.randn(8, 3, 12, 12).astype(np.float32) b = np.random.randn(8, 16, 10, 10).astype(np.float32) layer1 = rm.Conv2d(channel=16, input_size=a.shape[1:]) ga = rm.Variable(a, auto_update=False) gb = rm.Variable(b, auto_update=False) opt = Sgd(0.001, momentum=0.3) start_t = time.time() for _ in range(100000): loss = rm.Sum((rm.Sigmoid(layer1(ga)) - gb)**2) / 8 loss.ensure_cpu() print(loss) grad = loss.grad() grad.update(opt) del loss print(time.time() - start_t)
def __init__(self, num_class=1000, load_pretrained_weight=False): self._num_class = num_class self._base = Darknet19Base() self._last = rm.Conv2d(num_class, filter=1) self._last.params = { "w": rm.Variable(self._last._initializer((num_class, 1024, 1, 1)), auto_update=True), "b": rm.Variable(self._last._initializer((1, num_class, 1, 1)), auto_update=False), } super(Darknet19, self).__init__() if load_pretrained_weight: if isinstance(load_pretrained_weight, bool): load_pretrained_weight = self.__class__.__name__ + '.h5' if not os.path.exists(load_pretrained_weight): download(self.WEIGHT_URL, load_pretrained_weight) self.load(load_pretrained_weight)
def exp_convolution1(): np.random.seed(10) # Caused by CUDNN_CONVOLUTION_FWD_ALGO_GEMM is not deterministic. # 1724.07080078 GPU # 1715.86767578 CPU cuda.set_cuda_active(True) a = np.random.randn(8 * 2, 64, 32, 32).astype(np.float32) b = np.random.randn(8 * 2, 32, 28, 28).astype(np.float32) layer1 = rm.Conv2d(channel=32, input_size=a.shape[1:]) layer2 = rm.Conv2d(channel=32, input_size=(32, 30, 30)) ga = rm.Variable(a, auto_update=False) gb = rm.Variable(b, auto_update=False) opt = Sgd(0.0001, momentum=0.0) start_t = time.time() for _ in range(100): loss = rm.Sum((layer2(rm.Relu(layer1(ga))) - gb)**2) / 8 loss.ensure_cpu() grad = loss.grad() grad.update(opt) print(loss) print(time.time() - start_t)
def __call__(self, orig_img): orig_input_height, orig_input_width, _ = orig_img.shape img = reshape_to_yolo_size(orig_img) input_height, input_width, _ = img.shape img = np.asarray(img, dtype=np.float32) / 255.0 img = img.transpose(2, 0, 1) x_data = img[np.newaxis, :, :, :] x = rm.Variable(x_data) x, y, w, h, conf, prob = yolo_predict(self.model, x) _, _, _, grid_h, grid_w = x.shape x = np.reshape(x, (self.bbox, grid_h, grid_w)) y = np.reshape(y, (self.bbox, grid_h, grid_w)) w = np.reshape(w, (self.bbox, grid_h, grid_w)) h = np.reshape(h, (self.bbox, grid_h, grid_w)) conf = np.reshape(conf, (self.bbox, grid_h, grid_w)) prob = np.transpose( np.reshape(prob, (self.bbox, self.classes, grid_h, grid_w)), (1, 0, 2, 3)) detected_indices = (conf * prob).max(axis=0) > self.detection_thresh results = [] for i in range(detected_indices.sum()): results.append({ "label": self.labels[prob.transpose(1, 2, 3, 0)[detected_indices][i].argmax()], "probs": prob.transpose(1, 2, 3, 0)[detected_indices][i], "conf": conf[detected_indices][i], "objectness": conf[detected_indices][i] * prob.transpose(1, 2, 3, 0)[detected_indices][i].max(), "box": Box(x[detected_indices][i] * orig_input_width, y[detected_indices][i] * orig_input_height, w[detected_indices][i] * orig_input_width, h[detected_indices][i] * orig_input_height).crop_region( orig_input_height, orig_input_width) }) # nms nms_results = nms(results, self.iou_thresh) return nms_results
def _test_binop(tmpdir, f, name): arg = rm.Variable(np.random.random((2, 2))) class Model(rm.Model): def forward(self, x): return f(x, arg) model = Model() input = np.random.random((2, 2)) m = _run_onnx(tmpdir, model, input) # check input assert m.graph.node[0].op_type == name input, rhs = m.graph.node[0].input # check lhs inis = load_initializer(m.graph.initializer) _test_initializer(inis, rhs, arg) # lhs should never has initializer assert input not in inis
def export_onnx(name, model, x, path, printtext=False): """ This function exports an onnx file Args: name(str): The name of computational graph. model(Model): Neural Network Model x(ndarray): Dummy input for building a computational graph. path(str): The onnx file path to which the model will be export. printtext(bool): If True is given, this function print the str(model). """ OBJNAMES.clear() if not isinstance(x, renom.Variable): x = renom.Variable(x) hook = _OnnxHook() renom.Model.set_hook(hook) renom.Node.set_hook(hook) try: with model.train(): ret = model(x) finally: renom.Model.set_hook(None) cur = [ret] parent_nodes = collections.defaultdict(set) child_nodes = collections.defaultdict(set) nodes = _IdDict() roots = _IdDict() # build tree while cur: node = cur.pop(0) if not isinstance(node, renom.Node): continue parents = [n for n in node._get_graph() if isinstance(n, renom.Node)] if not parents: roots.add(node) cur.extend(parents) nodes.add(node) for parent in parents: nodes.add(parent) parent_nodes[id(node)].add(id(parent)) child_nodes[id(parent)].add(id(node)) # sort tree sorted = [] remains = list(roots.values()) while remains: node = remains.pop(0) sorted.append(node) children = child_nodes[id(node)] for child in children: parents = parent_nodes[child] parents.remove(id(node)) if not parents: remains.append(nodes[child]) # sort extract params OBJNAMES[id(x)] = 'input' inputs = _NamedIdDict() inputs.add('input', x) OBJNAMES[id(ret)] = 'output' outputs = _NamedIdDict() outputs.add('output', ret) onnx_nodes = [] values = _NamedIdDict() for node in sorted: if node is not x: _register_node(onnx_nodes, inputs, outputs, values, node) if id(x) in values: del values[id(x)] inputs = [_value_info(v) for v in inputs.values()] outputs = [_value_info(v) for v in outputs.values()] for v in values.values(): if isinstance(v, renom.Node): v.to_cpu() initializers = [ onnx.numpy_helper.from_array(v, _to_param_name(v)) for v in values.values() ] onnx_graph = onnx.helper.make_graph(onnx_nodes, name, inputs, outputs, initializer=initializers) model = onnx.helper.make_model(onnx_graph, producer_name='renom', producer_version=renom.__version__) with open(path, 'wb') as f: f.write(model.SerializeToString()) if printtext: print(model)
def __init__(self): super(NN, self).__init__() self.params.value1 = rm.Variable(np.array([1., 2., 3., 4.])) self.params.value2 = rm.Variable(np.array([1., 2., 3., 4.]))
def forward(self, x, y=None, eps=1e-3): # x : input data # y : one-hot label data for categorical dist. or supporting dis. # empty is not assignment # self.qzx : style z # self.rep : input data for decoding nb = len(x) # --- encoding phase --- if 0: noise = random.randn(x.size).reshape(nb, x.shape[1])*0.03 self._x = x+noise else: _x = x if self.mode=='clustering' or self.mode=='reduction': self.qzx, self.qyx = self.enc(_x) else: self.qzx = self.enc(_x) # --- decoding/reconstruction phase --- if self.mode=='clustering' or self.mode=='reduction': self.recon = self.dec(rm.concat(self.qzx, self.qyx)) else: self.recon = self.dec(self.qzx) # --- reguralization phase --- if self.mode == 'incorp_label': self._set_incorpdist(x) else: self._set_distribution(x) if self.mode == 'clustering': "categorical dist" elif self.mode == 'supervised': "" elif self.mode == 'dim_reduction': "" if self.mode == 'incorp_label': self._incorp_label(x, y, eps=eps) else: self.Dpz = self.dis(self.pz) self.Dqzx = self.dis(self.qzx) self.real = -rm.sum(rm.log( self.Dpz + eps ))/nb self.fake = -rm.sum(rm.log( 1 - self.Dqzx + eps ))/nb self.fake2pos = -rm.sum(rm.log( self.Dqzx + eps ))/nb if self.mode=='clustering' or self.mode=='reduction': _idx = np.where(y.sum(1)==1)[0] idx_ = np.where(y.sum(1)==0)[0] if len(_idx) > 0: self.Cy = self.cds(y) self.Cqyx = self.cds(self.qyx) self.Creal = -rm.sum(rm.log( self.Cy[_idx] + eps ))/len(_idx) if 0: self.Cfake = -rm.sum(rm.log( 1 - self.Cqyx[_idx] + eps ))/len(_idx) else: self.Cfake = -rm.sum(rm.log( 1 - self.Cqyx + eps ))/nb self.Cfake2 = -rm.sum(rm.log( self.Cqyx[_idx] + eps ))/len(_idx) else: self.Cfake = rm.Variable(0) self.Creal = rm.Variable(0) self.Cfake2 = rm.Variable(0) # --- sumalizing loss --- self.gan_loss = self.real + self.fake if self.mode=='clustering': if len(_idx) > 0: self.reconE = rm.mean_squared_error( self.recon[idx_], x[idx_]) else: self.reconE = rm.mean_squared_error(self.recon, x) else: self.reconE = rm.mean_squared_error(self.recon, x) self.real_count = (self.Dpz >= 0.5).sum()/nb self.fake_count = (self.Dqzx < 0.5).sum()/nb self.enc_loss = self.fake2pos if self.mode=='clustering' or self.mode=='reduction': if len(_idx) > 0: self.Creal_count = (self.Cy[_idx] >= 0.5).sum()/len(_idx) self.Cfake_count = (self.Cqyx[_idx] < 0.5).sum()/len(_idx) else: self.Creal_count = 0 self.Cfake_count = 0 self.CganE = self.Creal + self.Cfake self.CgenE = self.Cfake2 return self.recon