def regularize_gpu(self): from chainer.cuda import cupy l2_M = cupy.sqrt(cupy.sum(self.M ** 2, axis=1))[:, cupy.newaxis] indexes = l2_M > self._reg_M self.M = cupy.where(indexes, self.M * (self._reg_M / l2_M), self.M) self.C = cupy.maximum(self._min_C, cupy.minimum(self.C, self._max_C))
def backward_gpu(self, inputs, grads): from chainer.cuda import cupy mean_x, cov_x, t = inputs gloss, = grads n_in = mean_x.shape[1] if self._covariance_type == CovarianceType.diagonal: self._kl_pos = self._kl_pos[cupy.newaxis, :].T self._kl_neg = self._kl_neg[cupy.newaxis, :].T delta_pos, delta_neg, dEdi_pos, dEdi_neg, dEdj_pos, dEdj_neg = ( cuda.elementwise( 'T mx, T cx, T mp, t cp, T mn, T cn', 'T dp, T dn, T ei_p, T ei_n, T ej_p, T ej_n', ''' T cc = 1.0 / cx; T cc2 = cc * cc; dp = cc * (mx - mp); dn = cc * (mx - mn); ei_p = 0.5 * (cp * cc2 + (mx - mp) * (mx - mp) * cc2 - cc); ei_n = 0.5 * (cn * cc2 + (mx - mn) * (mx - mn) * cc2 - cc); ej_p = 0.5 * (1.0 / cp - cc); ej_n = 0.5 * (1.0 / cn - cc); ''', 'calculate_partial_differentiations_diagonal', )(mean_x, cov_x, self._m_pos, self._c_pos, self._m_neg, self._c_neg) ) ge_m, ge_c = cuda.elementwise( 'T dp, T dn, T dedi_p, T dedi_n, float32 l, float32 m, \ T kl_p, T kl_n', 'T ge_m, T ge_c', ''' ge_m = 0; ge_c = 0; if ((kl_p - kl_n) < m) { ge_m += l * (dp + -dn); ge_c += l * (-dedi_p + dedi_n); } ''', 'calculate_error_diagonal', )(delta_pos, delta_neg, dEdi_pos, dEdi_neg, gloss, self._margin, self._kl_pos, self._kl_neg) cuda.elementwise( 'raw T dp, raw T dn, raw T dedj_p, raw T dedj_n, S kp, S kn, \ int32 c, float32 l, float32 m, T kl_p, T kl_n', 'raw T gM, raw T gC', ''' if ((kl_p - kl_n) < m) { for (int j = 0; j < c; ++j) { atomicAdd(&gM[kp * c + j], l * (-dp[i * c + j])); atomicAdd(&gC[kp * c + j], l * (-dedj_p[i * c + j])); atomicAdd(&gM[kn * c + j], l * dn[i * c + j]); atomicAdd(&gC[kn * c + j], l * dedj_n[i * c + j]); } } ''', 'accumlate_loss_diagonal' )(delta_pos, delta_neg, dEdj_pos, dEdj_neg, self._pos_indexes[:, numpy.newaxis], self._neg_indexes[:, numpy.newaxis], n_in, gloss, self._margin, self._kl_pos, self._kl_neg, self.gM, self.gC) elif self._covariance_type == CovarianceType.spherical: delta_pos, delta_neg = cuda.elementwise( 'T mx, T cx, T mp, T mn', 'T dp, T dn', ''' dp = (1.0 / cx) * (mx - mp); dn = (1.0 / cx) * (mx - mn); ''', 'calculate_delta_prime_spherical', )(mean_x, cov_x, self._m_pos, self._m_neg) ddp = cupy.sum(delta_pos * delta_pos, axis=1)[cupy.newaxis, :].T ddn = cupy.sum(delta_neg * delta_neg, axis=1)[cupy.newaxis, :].T dEdi_pos, dEdi_neg = cuda.elementwise( 'T cx, T cp, T cn, T ddp, T ddn, int32 in', 'T ep, T en', ''' ep = 0.5 * (in * cp / (cx * cx) + ddp - (1.0 / cx)); en = 0.5 * (in * cn / (cx * cx) + ddn - (1.0 / cx)); ''', 'calculate_dEdi_spherical', )(cov_x, self._c_pos, self._c_neg, ddp, ddn, n_in) dEdj_pos, dEdj_neg = cuda.elementwise( 'T cx, T cp, T cn, int32 in', 'T ep, T en', ''' ep = 0.5 * (1.0 / cp * in - 1.0 / cx * in); en = 0.5 * (1.0 / cn * in - 1.0 / cx * in); ''', 'calculate_dEdj_spherical', )(cov_x, self._c_pos, self._c_neg, n_in) ge_m = cuda.elementwise( 'T dp, T dn, float32 l, float32 m, T kl_p, T kl_n', 'T ge_m', ''' if ((kl_p - kl_n) < m) { ge_m = l * (dp + -dn); } else { ge_m = 0; } ''', 'calculate_gradient_of_mean_spherical', )(delta_pos, delta_neg, gloss, self._margin, self._kl_pos, self._kl_neg) ge_c = cuda.elementwise( 'T dedi_p, T dedi_n, float32 l, float32 m, T kl_p, T kl_n', 'T ge_c', ''' if ((kl_p - kl_n) < m) { ge_c = l * (-dedi_p + dedi_n); } else { ge_c = 0; } ''', 'calculate_gradient_of_covariance_spherical', )(dEdi_pos, dEdi_neg, gloss, self._margin, self._kl_pos, self._kl_neg) cuda.elementwise( 'raw T dp, raw T dn, raw T dedj_p, raw T dedj_n, S kp, S kn, \ int32 c, float32 l, float32 m, T kl_p, T kl_n', 'raw T gM, raw T gC', ''' if ((kl_p - kl_n) < m) { for (int j = 0; j < c; ++j) { atomicAdd(&gM[kp * c + j], l * (-dp[i * c + j])); atomicAdd(&gM[kn * c + j], l * dn[i * c + j]); } atomicAdd(&gC[kp], l * (-dedj_p[i])); atomicAdd(&gC[kn], l * dedj_n[i]); } ''', 'accumlate_loss_spherical' )(delta_pos, delta_neg, dEdj_pos, dEdj_neg, self._pos_indexes[:, cupy.newaxis], self._neg_indexes[:, cupy.newaxis], n_in, gloss, self._margin, self._kl_pos, self._kl_neg, self.gM, self.gC) del self._kl_pos del self._kl_neg return ge_m, ge_c, None