def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def addgrad(self, var): """Accumulates the gradient array from given source variable. This method adds the gradient of a given variable to the gradient of this variable. The accumulation is even done across the host and different devices. If this variable has uninitialized data/grad arrays, this method initializes it with the shape of the given variable and then accumulates the gradient. Args: var (Variable): Source variable. """ src = var._grad_var if src is None: return if self.data is None: self.initialize(var.shape) dst = self._grad_var src_dev = cuda.get_device_from_array(src.data) dst_dev = cuda.get_device_from_array(self.data) if src_dev.id != dst_dev.id: src = chainer.functions.copy(src, dst_dev.id) self._grad_var = src if dst is None else src + dst
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable( xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def test_to_gpu_from_another_gpu(self): cp = cuda.cupy a = chainer.Variable(cp.zeros(3, dtype=np.float32)) a.grad = cuda.cupy.ones_like(a.data) b = a.data.copy() gb = a.grad.copy() a.to_gpu(1) self.assertEqual(int(cuda.get_device_from_array(a.data)), 1) self.assertEqual(int(cuda.get_device_from_array(a.grad)), 1) cp.testing.assert_array_equal(a.data, b) cp.testing.assert_array_equal(a.grad, gb)
def _sum_sqnorm(arr): sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s # If only a single device is used, aggregate square norms on it. if len(sq_sum) == 1: with cuda.get_device_from_array(arr[0]): return sum(six.itervalues(sq_sum)) else: return sum([float(i) for i in six.itervalues(sq_sum)])
def check_addgrad(self, src, dst, expect, clear_src_grad=False, clear_dst_grad=False): xp = cuda.get_array_module(dst) a = chainer.Variable(src) a.grad = src b = chainer.Variable(dst) b.grad = dst if clear_src_grad: a.cleargrad() if clear_dst_grad: b.cleargrad() b.addgrad(a) xp.testing.assert_array_equal(b.grad, expect) self.assertEqual(cuda.get_device_from_array(b.data), cuda.get_device_from_array(b.grad))
def prepare(self): """Prepares for an update. This method initializes missing optimizer states (e.g. for newly added parameters after the set up), and copies arrays in each state dictionary to CPU or GPU according to the corresponding parameter array. """ states = self._states for name, param in self.target.namedparams(): if name not in states: state = {} self.init_state(param, state) states[name] = state else: state = states[name] with cuda.get_device_from_array(param.data) as dev: if int(dev) == -1: # cpu for key, value in six.iteritems(state): if isinstance(value, cuda.ndarray): state[key] = value.get() else: # gpu cupy = cuda.cupy for key, value in six.iteritems(state): if isinstance(value, numpy.ndarray): state[key] = cuda.to_gpu(value) elif (isinstance(value, cupy.ndarray) and value.device != dev): state[key] = cupy.copy(value)
def accumulate_grads(self, grads): """Accumulates gradients from other source. This method just adds given gradient arrays to gradients that this optimizer holds. It is typically used in data-parallel optimization, where gradients for different shards are computed in parallel and aggregated by this method. This method correctly treats multiple GPU devices. Args: grads (Iterable): Iterable of gradient arrays to be accumulated. .. deprecated:: v1.5 Use the :meth:`chainer.Link.addgrads` method of the target link instead. """ for param, g_src in zip(self.target.params(), grads): g_dst = param.grad if isinstance(g_dst, numpy.ndarray): g_dst += cuda.to_cpu(g_src) continue with cuda.get_device_from_array(g_dst): if (isinstance(g_src, cuda.ndarray) and g_dst.device != g_src.device): g_dst += cuda.copy(g_src, out_device=g_dst.device) else: g_dst += cuda.to_gpu(g_src)
def __call__(self, opt): count = 0 for name, param in opt.target.namedparams(): p, g = param.data, param.grad if p is None or g is None: return # gammaを含むLink(BatchNormalization)がpruning対象 # targets指定がある場合は、その名前を含むBatchNormalizationのみ対象 # targetsがNoneなら、すべてのBatchNormalizationが対象 if 'gamma' not in name: continue elif self.target_layers is not None and not any( [target in name for target in self.target_layers]): continue else: # pruning targets count += 1 logger.debug('Lasso: {}'.format(name)) pass # TODO(tkato) reporter xp = backend.get_array_module(p) with cuda.get_device_from_array(p) as dev: sign = xp.sign(p) if int(dev) == -1: g += self.rate * sign else: kernel = cuda.elementwise('T s, T decay', 'T g', 'g += decay * s', 'lasso') kernel(sign, self.rate, g) if count == 0: logger.warning('Lasso is not apply')
def get_xp(self, array): if isinstance(array, Variable): array = array.data with cuda.get_device_from_array(array) as dev: xp = np if int(dev) == -1 else cuda.cupy return xp
def _prepare(self, param): with cuda.get_device_from_array(param.data) as device: state = self.state if state is None: state = self._state = {} self.init_state(param) for name, value in six.iteritems(state): if not isinstance(value, (numpy.ndarray, cuda.ndarray)): continue value_device = cuda.get_device_from_array(value) if value_device.id != device.id: if device.id >= 0: state[name] = cuda.to_gpu(value) else: state[name] = cuda.to_cpu(value)
def _child_sum_tree_lstm(func, *inputs): cs = inputs[:len(inputs) // 2] hs = inputs[len(inputs) // 2:-1] x = inputs[-1] xp = cuda.get_array_module(x) with cuda.get_device_from_array(x): W_x = func.W_x.W.data.T b_x = func.W_x.b.data W_h_aio = func.W_h_aio.W.data.T W_h_f = func.W_h_f.W.data.T W_xa, W_xi, W_xo, W_xf = xp.split(W_x, 4, 1) b_a, b_i, b_o, b_f = xp.split(b_x[None, ], 4, 1) W_ha, W_hi, W_ho = xp.split(W_h_aio, 3, 1) W_hf = W_h_f sum_h = sum(hs) a = x.dot(W_xa) + sum_h.dot(W_ha) + b_a i = x.dot(W_xi) + sum_h.dot(W_hi) + b_i o = x.dot(W_xo) + sum_h.dot(W_ho) + b_o f_list = [x.dot(W_xf) + h.dot(W_hf) + b_f for h in hs] a = xp.tanh(a) i = _sigmoid(i) o = _sigmoid(o) f_list = [_sigmoid(f) for f in f_list] c_next = a * i + sum(f * c for f, c in zip(f_list, cs)) y = o * xp.tanh(c_next) return c_next, y
def predict(self, imgs, bbox): # 传入的是一个batch的数据 """predict AUs from image and bbox. Args: imgs(iterable of numpy.ndarray): Arrays holding images. shape = (B,C,H,W), where B is batch_size All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. bboxes (iterable of numpy.ndarray): Arrays holding bounding boxes, each is bboxes inside one image shape=(Batch,R,4) Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ xp = cuda.get_array_module(imgs) with cuda.get_device_from_array(imgs) as device, \ chainer.function.no_backprop_mode(): roi_scores, _, _ = self.__call__( imgs, bbox, layers=["fc"]) # shape = R', class_num pred_label = self.fetch_labels_from_scores(xp, roi_scores.data) return pred_label, roi_scores.data
def forward(self, inputs): x, = inputs self._in_device = cuda.get_device_from_array(x).id if int(self.out_device) == -1: return cuda.to_cpu(x), else: return cuda.to_gpu(x, self.out_device),
def calc_accuracy(self, x, t): batch_predictions, _, _ = x batch_predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in batch_predictions], axis=0) self.xp = cuda.get_array_module(batch_predictions[0], t) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): classification = F.softmax(batch_predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(t) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
def __call__(self, atom_array, adj, wle_array=None, is_real_node=None): self.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) else: # TODO: GraphLinear or GraphMLP can be used. h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) # all Combined NLE processes are done here. if self.with_wle: h_s = self.embed_wle(wle_array) # gated sum gate_input = self.gate_W1(h) + self.gate_W2(h_s) gate_coefff = functions.sigmoid(gate_input) h = (1.0 - gate_coefff) * h + gate_coefff * h_s additional_kwargs = self.preprocess_addtional_kwargs( atom_array, adj, wle_array=wle_array, is_real_node=is_real_node) if self.scale_adj: adj = rescale_adj(adj) g_list = [] for step in range(self.n_update_layers): update_layer_index = 0 if self.weight_tying else step h = self.update_layers[update_layer_index](h=h, adj=adj, **additional_kwargs) if self.use_batchnorm: h = self.bnorms[update_layer_index](h) if self.dropout_ratio > 0.: h = functions.dropout(h, ratio=self.dropout_ratio) if self.activation is not None and step < self.n_activation: h = self.activation(h) if self.concat_hidden or self.sum_hidden: g = self.readout_layers[step](h=h, h0=h0, is_real_node=is_real_node, **additional_kwargs) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: if self.sum_hidden: g = functions.sum(functions.stack(g_list), axis=0) else: g = self.readout_layers[0](h=h, h0=h0, is_real_node=is_real_node) return g
def localization_net(self, images): self.lstm.reset_state() self.transform_2.reset_state() images = self.data_bn(images) h = F.relu(self.bn0(self.conv0(images))) h = F.max_pooling_2d(h, 3, stride=2, pad=1) h = self.rs1_1(h) h = self.rs1_2(h) h = self.rs2_1(h) h = self.rs2_2(h) h = self.rs3_1(h) h = self.rs3_2(h) # h = self.rs4_1(h) # h = self.rs4_2(h) self.localization_vis_anchor = h h = F.average_pooling_2d(h, 5, stride=1) localizations = [] with cuda.get_device_from_array(h.data): for _ in range(self.num_timesteps): in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) localizations.append(rotation_dropout(transformed, ratio=self.dropout_ratio)) return F.concat(localizations, axis=0)
def calc_accuracy(self, predictions, labels): batch_predictions = predictions # concat all individual predictions and slice for each time step batch_predictions = F.concat([F.expand_dims(p, axis=2) for p in batch_predictions], axis=2) t = F.reshape(labels, (1, self.args.timesteps, -1)) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): for prediction, label in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=2)): classification = F.softmax(prediction, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) # classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(label.data) for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: self.num_correct_words += 1 self.num_words += 1 return word, label
def get_gaussian_noise(X, std): device = cuda.get_device_from_array(X.data) X = cuda.to_cpu(X) Y = np.random.normal(X, scale=std).astype('f') Y = np.clip(Y, 0., 1.) Y = cuda.to_gpu(Y, device=device) return Y
def get_impulse_noise(X, level): p = 1. - level device = cuda.get_device_from_array(X.data) X = cuda.to_cpu(X) Y = X * np.random.binomial(1, p, size=X.shape).astype('f') Y = cuda.to_gpu(Y, device=device) return Y
def __call__(self, images): self.lstm.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) # h = self.rs4(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5, stride=2) localizations = [] # 预测产出N个二维仿射转换矩阵A with cuda.get_device_from_array(h.data): for _ in range(self.num_timesteps): in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) # rotation_dropout 旋转dropout 防止过度旋转 localizations.append( rotation_dropout(transformed, ratio=self.dropout_ratio)) return F.concat(localizations, axis=0)
def calc_accuracy(self, x, t): batch_predictions, _, _ = x self.xp = cuda.get_array_module(batch_predictions[0], t) batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) accuracies = [] for predictions, labels in zip(batch_predictions, F.separate(t, axis=1)): if isinstance(predictions, list): predictions = F.concat( [F.expand_dims(p, axis=0) for p in predictions], axis=0) with cuda.get_device_from_array(predictions.data): classification = F.softmax(predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(labels.data) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
def __call__(self, rule, param): grad = param.grad if grad is None: return xp = cuda.get_array_module(grad) with cuda.get_device_from_array(grad): xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
def zerograd(self): """Initializes the gradient array by zeros. Note that the gradient variable is unchained from the computational graph by this method because this operation breaks the backprop validity. .. deprecated:: v1.15 Use :meth:`cleargrad` instead. """ warnings.warn( 'Variable.zerograd is deprecated. Use Variable.cleargrad instead.', DeprecationWarning) if self.data is None: return with cuda.get_device_from_array(self.data) as dev: gv = self._grad_var if gv is None: xp = numpy if dev.id == -1 else cuda.cupy self.grad = xp.zeros_like(self.data) else: gv.unchain() gv.data.fill(0)
def __call__(self, atom_array, adj): """Forward propagation Args: atom_array (numpy.ndarray): minibatch of molecular which is represented with atom IDs (representing C, O, S, ...) `atom_array[mol_index, atom_index]` represents `mol_index`-th molecule's `atom_index`-th atomic number adj (numpy.ndarray): minibatch of adjancency matrix with edge-type information Returns: ~chainer.Variable: minibatch of fingerprint """ # reset state self.update_layer.reset_state() if atom_array.dtype == numpy.int32 \ or atom_array.dtype == cuda.cupy.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_layers): h = self.update(h, adj, step) if self.concat_hidden: g = self.readout(h, h0, step) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=2) else: g = self.readout(h, h0, 0) return g
def __call__(self, sparse_batch, is_real_node=None): if sparse_batch.x.dtype == self.xp.int32: h = self.embed(sparse_batch.x) # (minibatch, max_num_atoms) else: h = self.first_mlp(sparse_batch.x) h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_message_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index]( h, sparse_batch.edge_index) if step != self.n_message_layers - 1: h = functions.relu(h) if self.concat_hidden: g = self.readout_layers[step](h, h0, is_real_node) g_list.append(g) if self.node_embedding: return h if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout_layers[0](h, sparse_batch.batch, h0, is_real_node) return g
def I_up_loss(self, z, lossfun=None, converter=concat_examples): """Calculate I_up_loss(z, z_test) Note that `calc_s_test` must be executed beforehand, z_test is used in this method. Args: z: lossfun: converter: Returns: """ # TODO: currently, z must be 1 minibatch size. # Change this to use `elementwise_grad` so that we can calculate # I_up_loss for each z much more efficiently. if lossfun is None: # use self.target.__call__ as loss function if not set. lossfun = self.target # [Note] Use `grad_original` state self._calc_and_register_grad(z, self.STATE_GRAD_ORIGINAL, lossfun, converter) states = self._infl_states final_loss = 0 for name, param in self.target.namedparams(): with cuda.get_device_from_array(param.data): state = states[name] xp = cuda.get_array_module(param.data) final_loss += xp.sum(-state[self.STATE_HINV_V] * state[self.STATE_GRAD_ORIGINAL]) return final_loss
def __call__(self, atom_array, adj): # reset state # self.update_layer.reset_state() # [layer.reset_state() for layer in self.update_layer] if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] h_list = [] for step in range(self.n_layers): h = self.update(h, adj, step) if self.dropout_rate != 0.0: h = functions.dropout(h, ratio=self.dropout_rate) if self.concat_hidden: g = self.readout(h, h0, step) g_list.append(g) if self.layer_aggr: h_list.append(h) if self.concat_hidden: return functions.concat(g_list, axis=1) elif self.layer_aggr: output = self.aggr(h_list) return self.readout(output, h0, 0) else: g = self.readout(h, h0, 0) return g
def __call__(self, atom_array, adj, is_real_node=None): """Forward propagation Args: atom_array (numpy.ndarray): minibatch of molecular which is represented with atom IDs (representing C, O, S, ...) `atom_array[mol_index, atom_index]` represents `mol_index`-th molecule's `atom_index`-th atomic number adj (numpy.ndarray): minibatch of adjancency matrix with edge-type information is_real_node (numpy.ndarray): 2-dim array (minibatch, num_nodes). 1 for real node, 0 for virtual node. If `None`, all node is considered as real node. Returns: ~chainer.Variable: minibatch of fingerprint """ # reset state self.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index](h, adj) if self.concat_hidden: g = self.readout_layers[step](h, h0, is_real_node) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout_layers[0](h, h0, is_real_node) return g
def _concat_arrays(arrays, padding): if padding is not None: return _concat_arrays_with_padding(arrays, padding) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): return xp.concatenate([array[None] for array in arrays])
def forward_gpu(self, x): self.retain_inputs(()) self._in_device = cuda.get_device_from_array(x[0]) if self.out_device == -1: return cuda.to_cpu(x[0]), else: return cuda.copy(x[0], out_device=self.out_device),
def init_state(self, param): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) if ia.all_ready((self.state['v'], )): self.state['v'] = ia.array(self.state['v'], itype=ia.ideep4py.wgt_array)
def __call__(self, opt): sqnorm = _sum_sqnorm([p.grad for p in opt.target.params(False)]) with cuda.get_device_from_array(sqnorm) as dev: norm = backend.get_array_module(sqnorm).sqrt(sqnorm) rate = self.threshold / norm # When no clipping is needed, skip the clipping on CPU and # multiply 1.0 on the device otherwise. if int(dev) == -1: if rate >= 1: return else: rate = rate.clip(None, 1) for param in opt.target.params(False): grad = param.grad with cuda.get_device_from_array(grad): grad *= rate
def __call__(self, atom_array, adj): # reset state self.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) else: h = atom_array if self.readout_func == 'ggnn': h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) readout_layers = [ partial(readout_layer, h0=h0) for readout_layer in self.readout_layers ] else: readout_layers = self.readout_layers g_list = [] for step in range(self.n_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index](h, adj) if self.concat_hidden: g = readout_layers[step](h) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = readout_layers[0](h) return g
def __call__(self, images): self.lstm.reset_state() self.transform_2.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) # h = self.rs4(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5, stride=2) localizations = [] with cuda.get_device_from_array(h.data): # lstm_prediction = chainer.Variable(self.xp.zeros((len(images), self.lstm.state_size), dtype=h.dtype)) for _ in range(self.num_timesteps): # in_feature = self.attend(h, lstm_prediction) in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) localizations.append( rotation_dropout(transformed, ratio=self.dropout_ratio)) return F.concat(localizations, axis=0)
def __call__(self, atom_array, adj): # reset state self.atoms_list = [] self.g_vec_list = [] self.update_layer.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] h_list = [] for step in range(self.n_layers): h = self.update(h, adj, step) if self.dropout_rate != 0.0: h = functions.dropout(h, ratio=self.dropout_rate) if self.concat_hidden: g = self.readout(h, h0, step) g_list.append(g) h_list.append(h) self.atoms_list.append(h) g_vec = self.readout(h, h0, step) self.g_vec_list.append(g_vec) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout(h, h0, 0) # g = self.att_readout(h, h_list, 0) g = functions.sum(h, axis=1) return g
def _sum_sqnorm(arr): sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
def __call__(self, opt): norm = numpy.sqrt(_sum_sqnorm([p.grad for p in opt.target.params()])) rate = self.threshold / norm if rate < 1: for param in opt.target.params(): grad = param.grad with cuda.get_device_from_array(grad): grad *= rate
def __call__(self, rule, param): p, g = param.data, param.grad with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += self.rate * p else: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') kernel(p, self.rate, g)
def __call__(self, opt): rate = self.rate for param in opt.target.params(): p, g = param.data, param.grad with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += rate * p else: self.kernel()(p, rate, g)
def __call__(self, opt): for param in opt.target.params(): g = param.grad xp = cuda.get_array_module(g) with cuda.get_device_from_array(g) as dev: noise = self.noise_func(xp, g.shape, g.dtype, self, opt) if int(dev) == -1: g += noise else: self.kernel()(noise, g)
def add_normal_noise(h, sigma=0.03): if np.random.randint(0, 2): n = np.random.normal(0, sigma, h.data.shape).astype(np.float32) device = cuda.get_device_from_array(h) if device.id == -1: n_ = Variable(n) else: n_ = Variable(cuda.to_gpu(n, device.id)) h = h + n_ return h
def addgrad(self, var): """Accumulates the gradient array from given source variable. This method adds the gradient of a given variable to the gradient of this variable. The accumulation is even done across the host and different devices. If this variable has uninitialized data/grad arrays, this method initializes it with the shape of the given varaible and then accumulates the gradient. Args: var (Variable): Source variable. """ src = var._node._grad if src is None: return if self.data is None: self.initialize(var.shape) dst = self._node._grad src_dev = cuda.get_device_from_array(src) dst_dev = cuda.get_device_from_array(self.data) if src_dev.id == dst_dev.id: with dst_dev: if dst is None: xp = cuda.get_array_module(src) self._node.grad = xp.copy(src) else: dst += src return if dst_dev.id < 0: src_grad = cuda.to_cpu(src) else: src_grad = cuda.to_gpu(src, device=dst_dev) if dst is None: self._node.grad = src_grad else: with dst_dev: dst += src_grad
def __init__(self, model, target_extractor=None, output_extractor=None, device=None, logger=None): self.model = model # type: chainer.Chain if device is not None: self._device = device else: self._device = cuda.get_device_from_array(*model.params()).id self.target_extractor = target_extractor self.output_extractor = output_extractor self.logger = logger or getLogger(__name__)
def __call__(self, opt): norm = np.sqrt(_sum_sqnorm([p.grad for p in opt.target.params(False)])) if norm == 0: return rate = self.threshold / norm if rate < 1: for param in opt.target.params(False): grad = param.grad with cuda.get_device_from_array(grad): grad *= rate
def __call__(self, feat_h, feat_w, gt_boxes, img_info): """Calc targets of classification labels and bbox regression. Args: feat_h (int): The height of feature map. feat_w (int): The width of feature map. gt_boxes (:class:`~chainer.Variable`): The ground truth bounding boxes and its class label array. The shape should be :math:`(1, n_gt_boxes, 5)` and the batchsize should be 1. Each 5-dimensional vector has :math:`(x1, y1, x2, y2, cls_id)`. The scale of these values is at the input image scale. img_info (:class:`~chainer.Variable`): The input image info. It contains :math:`(height, width)` and the batchsize should be 1. So the shape should be :math:`(1, 2)`. Returns: bbox_labels (:class:`~numpy.ndarray` or :class:`~cupy.ndarray`): Classification labels of all anchor boxes. It contains values from :math:`{-1, 0, 1}` and the numbers of negative (=0) and positive (=1) are the same. bbox_reg_targets (:class:`~numpy.ndarray` or :class:`~cupy.ndarray`): The regression targets of bounding box transformation parameters. inds_inside (:class:`~numpy.ndarray` or :class:`~cupy.ndarray`): Indices of all anchor boxes that inside of the input image out of all possible anchor boxes (`all_anchors`) that has :math:`K \times A` anchor boxes. This should be used to select proposals to be compared with the above two targets. n_all_bbox (int): The number of all possible bbox. This value is always larger than `len(inds_inside)`. """ if self.type_check_enable: self._check_data_type_forward(gt_boxes, img_info) # Currently it assumes that the batchsize is always 1 gt_boxes = gt_boxes.data[0] img_info = img_info.data[0] with cuda.get_device_from_array(gt_boxes): # (feat_h x feat_w x n_anchors, 4) xp = cuda.get_array_module(gt_boxes) all_bbox = xp.asarray(self._generate_all_bbox(feat_h, feat_w)) inds_inside, all_inside_bbox = keep_inside(all_bbox, img_info) argmax_overlaps_inds, bbox_labels = \ self._create_bbox_labels( inds_inside, all_inside_bbox, gt_boxes) # Convert fixed anchors in (x, y, w, h) to (dx, dy, dw, dh) gt_boxes = gt_boxes[argmax_overlaps_inds] bbox_reg_targets = bbox_transform(all_inside_bbox, gt_boxes) bbox_reg_targets = bbox_reg_targets.astype(xp.float32) return bbox_labels, bbox_reg_targets, inds_inside, len(all_bbox)
def extend_arrays_to_shape(arrays, out_shape, value=0): # Ref: `_concat_arrays_with_padding` method in chainer convert.py # https://github.com/chainer/chainer/blob/master/chainer/dataset/convert.py xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): result = xp.full(out_shape, value, dtype=arrays[0].dtype) for i in six.moves.range(len(arrays)): src = arrays[i] slices = tuple(slice(dim) for dim in src.shape) result[(i,) + slices] = src return result
def __call__(self, rule, param): p, g = param.data, param.grad xp = cuda.get_array_module(p) with cuda.get_device_from_array(p) as dev: sign = xp.sign(p) if int(dev) == -1: g += self.rate * sign else: kernel = cuda.elementwise( 'T s, T decay', 'T g', 'g += decay * s', 'lasso') kernel(sign, self.rate, g)
def __call__(self, rule, param): g = param.grad xp = cuda.get_array_module(g) with cuda.get_device_from_array(g) as dev: noise = self.noise_func(xp, g.shape, g.dtype, self, rule) if int(dev) == -1: g += noise else: kernel = cuda.elementwise( 'T noise', 'T g', 'g += noise', 'gradient_noise') kernel(noise, g)
def __call__(self, opt): rate = self.rate for param in opt.target.params(): p, g = param.data, param.grad xp = cuda.get_array_module(p) sign = xp.sign(p) with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += rate * sign else: self.kernel()(sign, rate, g)
def _concat_arrays(arrays, padding): # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in # types such as int or float. if not isinstance(arrays[0], numpy.ndarray) and\ not isinstance(arrays[0], cuda.ndarray): arrays = numpy.asarray(arrays) if padding is not None: return _concat_arrays_with_padding(arrays, padding) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): return xp.concatenate([array[None] for array in arrays])
def forward(self, inputs): gpu = cuda.get_array_module(*inputs) is not numpy inputs = [cuda.to_cpu(x) for x in inputs] outputs = self.forward_func(*inputs) if gpu: # TODO(unno): We can remove redundant gpu-cpu copy using # theano.sandbox.cuda.CudaNdarray.gpudata device = cuda.get_device_from_array(inputs) outputs = [cuda.to_gpu(x, device) for x in outputs] return tuple(outputs)