def __call__(self, x, y, nr_docs): x, y, nr_docs = as_variable(x), as_variable(y), as_variable(nr_docs) det_a = self.ranker.max(x) sto_a = self.ranker.draw(x) self.report_scores(det_a, y, nr_docs, x.dtype) self.report_scores(sto_a, y, nr_docs, x.dtype, prefix='stochastic/') return as_variable(y)
def listpl(x, t, nr_docs, α=10.0): """ The ListPL loss, a stochastic variant of ListMLE that in expectation approximates the true ListNet loss. :param x: The activation of the previous layer :type x: chainer.Variable :param t: The target labels :type t: chainer.Variable :param nr_docs: The number of documents per query :type nr_docs: chainer.Variable :param α: The temperature parameter of the plackett-luce :type α: float :return: The loss :rtype: chainer.Variable """ t, nr_docs = as_variable(t), as_variable(nr_docs) t = as_variable(t.data.astype(x.dtype)) t = cf.log_softmax(t * α) indices = sample_without_replacement(t) x_hat = select_items_per_row(x, indices) # Compute MLE loss per_sample_loss = -cf.sum(x_hat - logcumsumexp(x_hat), axis=1) return cf.mean(per_sample_loss)
def listmle(x, t, nr_docs): """ The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to Rank - Theory and Algorithm. :param x: The activation of the previous layer :type x: chainer.Variable :param t: The target labels :type t: chainer.Variable :param nr_docs: The number of documents per query :type nr_docs: chainer.Variable :return: The loss :rtype: chainer.Variable """ t, nr_docs = as_variable(t), as_variable(nr_docs) # Get the ground truth by sorting activations by the relevance labels indices = argsort(t, axis=1) x_hat = select_items_per_row(x, cf.flip(indices, axis=1)) # Compute MLE loss per_sample_loss = -cf.sum(x_hat - logcumsumexp(x_hat), axis=1) return cf.mean(per_sample_loss)
def uniform(self, x): pred = self._predict(x) xp = cuda.get_array_module(pred) log_p = F.log_softmax( as_variable(xp.ones(pred.shape, dtype=pred.dtype))) action = as_variable(sample_without_replacement(log_p)) return self._cut(action)
def test_bandify_update(): policy = ADFUCBPolicy(3) mcb = MultiClassBandify(policy) mcb.update_policy(policy) # Generate minibatch np.random.seed(42) x = as_variable( np.array([[[2.0, 1.0, 3.0], [-2.0, 4.0, -1.0]], [[-3.0, 0.0, -1.0], [1.0, 0.0, 1.0]], [[-1.0, 0.01, -1.0], [0.1, 2.0, 2.0]]])) y = as_variable(np.array([0, 1, 1], dtype=np.int32)) # Drawing at this point should return the default action expected = np.array([1, 0, 1]) assert_allclose(policy.draw(x).data, expected) # Run multi class bandify chain np.random.seed(42) for _ in range(100): _, a, log_p, r = mcb(x, y) # Drawing at this point should be perfect expected = np.array([0, 1, 1]) assert_allclose(policy.draw(x).data, expected)
def circular_correlation(self, left_x, right_x): """ Computes the circular correlation of two vectors a and b via their fast fourier transforms In python code, ifft(np.conj(fft(a)) * fft(b)).real :param left_x: () :param right_x: (a - j * b) * (c + j * d) = (ac + bd) + j * (ad - bc) :return: """ left_x_real = left_x left_x_imag = chainer.as_variable( self.xp.zeros_like(left_x_real, dtype=self.xp.float32)) left_x_fft_real, left_x_fft_imag = functions.fft( (left_x_real, left_x_imag)) right_x_real = right_x right_x_imag = chainer.as_variable( self.xp.zeros_like(right_x_real, dtype=self.xp.float32)) right_x_fft_real, right_x_fft_imag = functions.fft( (right_x_real, right_x_imag)) prod_fft_real = left_x_fft_real * right_x_fft_real + left_x_fft_imag * right_x_fft_imag prod_fft_imag = left_x_fft_real * right_x_fft_imag - left_x_fft_imag * right_x_fft_real ifft_real, _ = functions.ifft((prod_fft_real, prod_fft_imag)) return ifft_real
def ranknet(x, t, nr_docs): """ The RankNet loss as in Burges et al (2005), Learning to Rank using Gradient Descent :param x: The activation of the previous layer :type x: chainer.Variable :param t: The target labels :type t: chainer.Variable :return: The RankNet loss :rtype: chainer.Variable """ x, t, nr_docs = as_variable(x), as_variable(t), as_variable(nr_docs) t = as_variable(t.data.astype(x.dtype)) x_ij = _tiled_diff(x) t_ij = _tiled_diff(t) p_t_ij = cf.sigmoid(t_ij) # This loss is a simplified sigmoid cross entropy described in the paper c_ij = -p_t_ij * x_ij + cf.log(1.0 + cf.exp(x_ij)) loss = cf.mean(c_ij) return loss
def ndcg(ranking, relevance_scores, nr_docs=None, k=0, exp=True): """ Computes the nDCG@k for given list of true relevance labels (relevance_labels) and given permutation of documents (permutation) :param ranking: The ranking of the documents :type ranking: chainer.Variable :param relevance_scores: The ground truth relevance labels :type relevance_scores: chainer.Variable :param nr_docs: A vector of the nr_docs per row :type nr_docs: chainer.Variable :param k: The cut-off point (if set to 0, it does not cut-off, if set to smaller than 0, it computes all possible cut-offs and returns an array) :type k: int :param exp: Set to true to use the exponential variant of nDCG which has a stronger emphasis on retrieving relevant documents :type exp: bool :return: The nDCG@k value :rtype: chainer.Variable """ xp = cuda.get_array_module(relevance_scores) optimal_ranking = as_variable(xp.fliplr(xp.argsort(relevance_scores.data, axis=1))) _dcg = dcg(ranking, relevance_scores, nr_docs, k, exp).data _idcg = dcg(optimal_ranking, relevance_scores, nr_docs, k, exp).data _idcg[_idcg == 0.0] = 1.0 return as_variable(_dcg / _idcg)
def multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k, comm=None): mb_locs = chainer.as_variable(mb_locs) mb_confs = chainer.as_variable(mb_confs) gt_mb_locs = chainer.as_variable(gt_mb_locs) gt_mb_labels = chainer.as_variable(gt_mb_labels) xp = chainer.backends.cuda.get_array_module(gt_mb_labels.array) with chainer.backends.cuda.get_device_from_array(gt_mb_labels.array): positive = gt_mb_labels.array > 0 n_positive = positive.sum() if comm: n_positive = comm.allreduce_obj(n_positive) / comm.size if n_positive == 0: z = chainer.Variable(xp.zeros((), dtype=np.float32)) return z, z loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no') loc_loss = F.sum(loc_loss, axis=-1) loc_loss *= positive.astype(loc_loss.dtype) loc_loss = F.sum(loc_loss) / n_positive conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels) hard_negative = _hard_negative(conf_loss.array, positive, k) conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype) conf_loss = F.sum(conf_loss) / n_positive return loc_loss, conf_loss
def test_update(): policy = ThompsonPolicy(4, 6) # Generate minibatch np.random.seed(42) x = as_variable( np.array([[1.0, 2.0, 3.0, 3.0, -2.0, -1.0], [2.0, 3.0, 1.0, -1.0, -3.0, -2.0], [-1.0, -2.0, -1.0, 1.0, 3.0, 1.0], [-1.0, -2.0, 1.0, 1.0, 3.0, 1.0]])) y = as_variable(np.array([2, 1, 0, 3])) # Drawing at this point should return the default action expected = np.array([0, 1, 2, 3]) assert_allclose(policy.draw(x).data, expected) # Perfect update log_p = as_variable(np.zeros(y.shape)) for _ in range(100): a = as_variable(np.random.randint(4, size=y.shape)) r = (1.0 * (a.data == y.data)) policy.update(x, a, log_p, as_variable(r)) # Drawing at this point should be perfect expected = np.array([2, 1, 0, 3]) assert_allclose(policy.draw(x).data, expected)
def forward(self, inputs): xp = cuda.get_array_module(*inputs) ranking, relevance_labels = inputs # Computing nDCG on empty array should just return 0.0 if ranking.shape[1] == 0: return xp.zeros(ranking.shape[0]), # Top-k cutoff last = ranking.shape[1] if self.k > 0: last = min(self.k, last) # For the rankings, compute the relevance labels in order relevance = select_items_per_row(as_variable(relevance_labels), as_variable(ranking)) relevance = relevance[:, :last].data.astype(dtype=xp.float32) # Compute numerator of DCG formula if self.exp: numerator = (2.0 ** relevance) - 1.0 else: numerator = relevance # Compute denominator of DCG formula arange = xp.broadcast_to(2.0 + xp.arange(relevance.shape[1]), relevance.shape) denominator = xp.log2(arange) if self.k >= 0: return xp.asarray(xp.sum(numerator / denominator, axis=1)), else: return xp.asarray(xp.cumsum(numerator / denominator, axis=1)),
def add(self, r_hat): xp = cuda.get_array_module(r_hat) weights = self.alpha**(xp.arange(r_hat.shape[0], 0, -1.0) - 1.0) summation = (1 - self.alpha) * F.sum(as_variable(weights) * r_hat, axis=0) # Compute exponential moving variance batch = (1 - self.alpha) * F.cumsum( as_variable(weights) * r_hat, axis=0) + (self.alpha**r_hat.shape[0]) * self.biased_mean batch = batch.data / (1 - self.alpha**( self.n + 1.0 + xp.arange(0.0, r_hat.shape[0], 1.0))) batch_s = xp.roll(batch, 1, axis=0) batch_s[0] = self.mean diff = r_hat - batch_s diff = self.alpha * (diff**2) var = (1 - self.alpha) * F.sum(as_variable(weights) * diff, axis=0) self.var = (self.alpha**r_hat.shape[0]) * self.var + var.data # Compute exponential moving average self.biased_mean = (self.alpha** r_hat.shape[0]) * self.biased_mean + summation.data self.n += r_hat.shape[0] self.bias_correction_factor *= self.alpha**r_hat.shape[0] self.mean = self.biased_mean / (1 - self.bias_correction_factor) return self.mean
def get_longlinear(batchsize): model = LongLinear() x = np.random.uniform(size=(batchsize, 1000)).astype('f') x = chainer.as_variable(x) t = np.random.uniform(size=(batchsize, 1000)).astype('f') t = chainer.as_variable(t) return [x, t], model
def test_unpad_basic(): x = as_variable(np.array([[3, 2, 0, 1], [0, 1, 3, 2]])) print(x) nr_docs = as_variable(np.array([2, 3])) y = unpad(x, nr_docs) expected = as_variable(np.array([[0, 1, 3, 2], [0, 1, 2, 3]], 'f')) assert_allclose(y.data, expected.data)
def single_backward(fn, indices, retained_inputs, o_grads, i_grads, outputs=None): input_vars = [chainer.as_variable(z) for z in retained_inputs] fn.inputs = tuple([z.node for z in input_vars]) if outputs is not None: output_vars = [chainer.as_variable(z) for z in outputs] fn.outputs = tuple([weakref.ref(z.node) for z in output_vars]) fn._retained_output_data = tuple(outputs) i_grads = fn.backward_accumulate(indices, o_grads, i_grads) if len(i_grads) is not len(indices): i_grads_ = [] for i in indices: i_grads_.append(i_grads[i]) i_grads = i_grads_ input_vars = None fn.inputs = None output_vars = None fn.outputs = None fn._retained_output_data = None def unwrap(v): if isinstance(v, chainer.Variable): return v.data else: return v return (unwrap(v) for v in i_grads)
def test_unpad_single_column(): x = as_variable(np.array([[1], [2], [4], [0], [3]])) print(x) nr_docs = as_variable(np.array([1, 1, 1, 1, 1])) y = unpad(x, nr_docs) expected = as_variable(np.array([[1], [2], [4], [0], [3]], 'f')) assert_allclose(y.data, expected.data)
def test_unpad_single_row(): x = as_variable(np.array([[1, 2, 4, 0, 3]])) print(x) nr_docs = as_variable(np.array([3])) y = unpad(x, nr_docs) expected = as_variable(np.array([[1, 2, 0, 4, 3]], 'f')) assert_allclose(y.data, expected.data)
def __call__(self, x, adj): # x (batch_size, ): atom id array h = chainer.as_variable(x) h = self.embed_model.embedding(h) # add gaussian noise if chainer.config.train: h += (self.xp.random.randn(*h.shape) * self.word_channel_stds * self.hyperparams.feature_noise_scale) adj = chainer.as_variable(adj) sum_log_det_jacobian_x = chainer.as_variable( self.xp.zeros([h.shape[0]], dtype=self.xp.float32)) sum_log_det_jacobian_adj = chainer.as_variable( self.xp.zeros([h.shape[0]], dtype=self.xp.float32)) # forward step for channel-coupling layers for i in range(self.hyperparams.num_coupling["feature"]): h, log_det_jacobians = self.clinks[i](h, adj) sum_log_det_jacobian_x += log_det_jacobians # add uniform noise to adjacency tensors if chainer.config.train: adj += self.xp.random.uniform(0, 0.9, adj.shape) # forward step for adjacency-coupling layers for i in range(self.hyperparams.num_coupling["feature"], len(self.clinks)): adj, log_det_jacobians = self.clinks[i](adj) sum_log_det_jacobian_adj += log_det_jacobians adj = F.reshape(adj, (adj.shape[0], -1)) h = F.reshape(h, (h.shape[0], -1)) out = [h, adj] return out, [sum_log_det_jacobian_x, sum_log_det_jacobian_adj]
def get_diamond(batchsize): model = DiamondClassifier(784, 10) x = np.random.uniform(size=(batchsize, 784)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize,), low=0, high=10)\ .astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def _noise_generate(self, batchsize): vis_noise = xp.random.uniform(-1, 1, (batchsize, 128)).astype(xp.float32) zvis = chainer.as_variable(vis_noise) ztag = self._get_fake_tag_batch(batchsize, self.dim, threshold=0.75) ztag = chainer.as_variable(ztag) return zvis, ztag
def get_convrelu(batchsize): model = ConvReLUClassifier() x = np.random.uniform(size=(batchsize, 10000)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, ), low=0, high=10).astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def test_inv_select_items_none(): idx = as_variable(np.array([[0, 1, 2, 3], [0, 1, 2, 3]], dtype=np.int32)) val = as_variable(np.array([[0.5, 3.14, 0.0, -9.9], [1.0, -1.0, 1.0, 4.0]])) out = inverse_select_items_per_row(val, idx) assert_allclose(out.data, np.array([[], []], dtype=np.int32))
def _tiles(x): xp = cuda.get_array_module(x) x = x.data x_i = xp.reshape(x, (x.shape[0], x.shape[1], 1)) x_j = xp.reshape(x, (x.shape[0], 1, x.shape[1])) x_i = xp.broadcast_to(x_i, (x.shape[0], x.shape[1], x.shape[1])) x_j = xp.broadcast_to(x_j, (x.shape[0], x.shape[1], x.shape[1])) return as_variable(x_i), as_variable(x_j)
def test_select_items_identity(): idx = as_variable(np.array([[0, 1, 2, 3], [0, 1, 2, 3]])) val = as_variable(np.array([[0.5, 3.14, 0.0, -9.9], [1.0, -1.0, 1.0, 4.0]])) out = select_items_per_row(val, idx) assert_allclose(out.data, val.data)
def get_resnet152(batchsize): model = L.ResNet152Layers(pretrained_model=None) model = Wrapper(model, 'fc6') x = np.random.uniform(size=(batchsize, 3, 224, 224)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, ), low=0, high=1000).astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def get_pspnet(batchsize): model = pspnet() x = np.random.uniform(size=(batchsize, 3, 713, 713)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, 713, 713,), low=0, high=10)\ .astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def __call__(self, xs, ys, nr_docs): prediction = self.ranker(xs) loss = self.loss_fn(prediction, ys, nr_docs) report({"loss": loss}) xp = cuda.get_array_module(prediction) ranking = as_variable(xp.fliplr(xp.argsort(prediction.data, axis=1))) ndcg_score = ndcg(ranking, as_variable(ys), as_variable(nr_docs)) report({"ndcg": F.mean(ndcg_score)}) return loss
def get_unet(batchsize): model = UNET() x = np.random.uniform(size=(batchsize, 1, 572, 572)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, 388, 388), low=0, high=2).astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def get_segnet(batchsize): model = SegNetBasic(n_class=17) model = PixelwiseSoftmaxClassifier(model, class_weight=np.ones(17)) x = np.random.uniform(size=(batchsize, 3, 1024, 1024)).astype('f') x = chainer.as_variable(x) t = np.random.randint(size=(batchsize, 1024, 1024), low=0, high=10)\ .astype(np.int32) t = chainer.as_variable(t) return [x, t], model
def forward(self, inputs): loc, scale = inputs xp = backend.get_array_module(loc) eps = xp.random.randn(*loc.shape) z = as_variable(loc).array + eps * as_variable(scale).array self.retain_inputs((0, 1)) self.retain_outputs((0, )) return xp.array(z, dtype='f'),
def __init__(self, loc, **kwargs): scale_tril = None if kwargs: scale_tril, = argument.parse_kwargs( kwargs, ('scale_tril', scale_tril)) if scale_tril is None: raise ValueError("`scale_tril` must have a value.") self.loc = chainer.as_variable(loc) self.scale_tril = chainer.as_variable(scale_tril) self.d = self.scale_tril.shape[-1]
def __init__(self, p=None, logit=None): super(Bernoulli, self).__init__() if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: self.logit = chainer.as_variable(logit) self.p = sigmoid.sigmoid(self.logit) else: self.p = chainer.as_variable(p) self.logit = exponential.log(self.p) \ - logarithm_1p.log1p(-self.p)
def broadcast(*args): """Broadcast given variables. Args: args (:class:`~chainer.Variable` or :class:`numpy.ndarray` \ or :class:`cupy.ndarray`): Input variables to be broadcasted. Each dimension of the shapes \ of the input variables must have the same size. Returns: ~chainer.Variable: :class:`~chainer.Variable` or tuple of \ :class:`~chainer.Variable` objects which are broadcasted \ from given arguments. .. admonition:: Example >>> x = np.random.uniform(0, 1, (3, 2)).astype(np.float32) >>> y = F.broadcast(x) >>> np.all(x == y.data) True >>> z = np.random.uniform(0, 1, (3, 2)).astype(np.float32) >>> y, w = F.broadcast(x, z) >>> np.all(x == y.data) & np.all(z == w.data) True """ if len(args) == 1: return chainer.as_variable(args[0]) return Broadcast().apply(args)
def cast(x, typ): """Cast an input variable to a given type. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable to be casted. A \ :math:`(s_1, s_2, ..., s_N)`-shaped array. typ (:class:`str` of dtype or :class:`numpy.dtype`): Typecode or data type to cast. Returns: ~chainer.Variable: Variable holding a casted array. .. admonition:: Example >>> x = np.arange(0, 3, dtype=np.float64) >>> x.dtype dtype('float64') >>> y = F.cast(x, np.float32) >>> y.dtype dtype('float32') >>> y = F.cast(x, 'float16') >>> y.dtype dtype('float16') """ if x.dtype == typ: if not chainer.config.enable_backprop: return chainer.as_variable(x) return Cast(typ).apply((x,))[0]
def sign(x): """Elementwise sign function. For a given input :math:`x`, this function returns :math:`sgn(x)` defined as .. math:: sgn(x) = \\left \\{ \\begin{array}{cc} -1 & {\\rm if~x < 0} \\\\ 0 & {\\rm if~x = 0} \\\\ 1 & {\\rm if~x > 0} \\\\ \\end{array} \\right. .. note:: The gradient of this function is ``None`` everywhere and therefore unchains the computational graph. Args: x (~chainer.Variable): Input variable for which the sign is computed. Returns: ~chainer.Variable: Output variable. """ if isinstance(x, chainer.variable.Variable): x = x.array xp = backend.get_array_module(x) return chainer.as_variable(utils.force_array(xp.sign(x)))
def broadcast_to(x, shape): """Broadcast a given variable to a given shape. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable be broadcasted. A \ :math:`(s_1, s_2, ..., s_N)`-shaped float array. shape (tuple): Tuple of :class:`int` of the shape of the \ output variable. Returns: ~chainer.Variable: Output variable broadcasted to the given shape. .. admonition:: Example >>> x = np.arange(0, 3) >>> x array([0, 1, 2]) >>> y = F.broadcast_to(x, (3, 3)) >>> y.data array([[0, 1, 2], [0, 1, 2], [0, 1, 2]]) """ if x.shape == shape: return chainer.as_variable(x) y, = BroadcastTo(shape).apply((x,)) return y
def sum_to(x, shape): """Sum elements along axes to output an array of a given shape. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. shape (tuple of int): The target shape. Returns: ~chainer.Variable: Output variable of shape ``shape``. .. admonition:: Example >>> x = np.array([[1., 2., 3.], [4., 5., 6.]]) >>> x array([[1., 2., 3.], [4., 5., 6.]]) >>> y = F.sum_to(x, (1, 3)) >>> y variable([[5., 7., 9.]]) >>> z = F.sum_to(x, (2, 1)) >>> z variable([[ 6.], [15.]]) """ if x.shape == shape: return chainer.as_variable(x) y, = SumTo(shape).apply((x,)) return y
def prob(self, x): x = chainer.as_variable(x) if self._is_gpu: valid = cuda.cupy.bitwise_or(x.array == 0, x.array == 1) else: valid = numpy.bitwise_or(x.array == 0, x.array == 1) ret = x * self.p + (1 - x) * (1 - self.p) return ret * valid
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs( kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = log_softmax.log_softmax(logit, axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def forward(self, input, target, mask): input = chainer.as_variable(input) target = chainer.as_variable(target) mask = chainer.as_variable(mask) output = self.predictor(input) output = output * mask target = target * mask d_fake = self.discriminator(input, output) d_real = self.discriminator(input, target) loss = { 'predictor': self._loss_predictor(self.predictor, output, target, d_fake), 'discriminator': self._loss_discriminator(self.discriminator, d_real, d_fake), } return loss
def log_prob(self, x): x = chainer.as_variable(x) logp = self._log_alpha \ + self.alpha * self._log_scale \ - (self.alpha + 1) * exponential.log(x) xp = logp.xp return where.where( utils.force_array(x.data >= self.scale.data), logp, xp.array(-xp.inf, logp.dtype))
def __init__(self, loc, scale=None, **kwargs): super(Normal, self).__init__() log_scale = None if kwargs: log_scale, = argument.parse_kwargs( kwargs, ('log_scale', log_scale)) if not (scale is None) ^ (log_scale is None): raise ValueError( "Either `scale` or `log_scale` (not both) must have a value.") self.loc = chainer.as_variable(loc) with chainer.using_config('enable_backprop', True): if scale is None: self.__log_scale = chainer.as_variable(log_scale) self.__scale = exponential.exp(self.log_scale) else: self.__scale = chainer.as_variable(scale) self.__log_scale = exponential.log(self.scale)
def log_prob(self, x): x = chainer.as_variable(x) logp = (self.a - 1) * exponential.log(x) \ + (self.b - 1) * exponential.log(1 - x) \ - _lbeta(self.a, self.b) xp = logp.xp return where.where( utils.force_array((x.array >= 0) & (x.array <= 1)), logp, xp.array(-xp.inf, logp.dtype))
def dropout(x, ratio=.5, **kwargs): """dropout(x, ratio=.5) Drops elements of input variable randomly. This function drops input elements randomly with probability ``ratio`` and scales the remaining elements by factor ``1 / (1 - ratio)``. In testing mode, it does nothing and just returns ``x``. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', boolean)``. See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. A :math:`(s_1, s_2, ..., s_N)` -shaped float array. ratio (float): Dropout ratio. The ``ratio`` must be ``0.0 <= ratio < 1.0``. Returns: ~chainer.Variable: Output variable. See the paper by G. Hinton: `Improving neural networks by preventing \ co-adaptation of feature detectors <https://arxiv.org/abs/1207.0580>`_. .. admonition:: Example >>> x = np.array([[-1, 0], [2, -3], [-2, 1]], np.float32) >>> with chainer.using_config('train', True): ... y = F.dropout(x) >>> y.data array([[-2., 0.], [ 4., -6.], [-0., 2.]], dtype=float32) >>> with chainer.using_config('train', True): ... y = F.dropout(x, ratio=0.0) \ # dropout returns original input if ratio=0.0 >>> (x == y.data).all() True >>> with chainer.using_config('train', False): ... y = F.dropout(x) \ # dropout in test mode returns original input >>> (x == y.data).all() True """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) if configuration.config.train: return Dropout(ratio).apply((x,))[0] return chainer.as_variable(x)
def prob(self, x): x = chainer.as_variable(x) prob = x * self.p + (1 - x) * (1 - self.p) if self.binary_check: if self._is_gpu: valid = cuda.cupy.bitwise_or(x.array == 0, x.array == 1) else: valid = numpy.bitwise_or(x.array == 0, x.array == 1) prob *= valid return prob
def __init__(self, **kwargs): low, high, loc, scale = None, None, None, None if kwargs: low, high, loc, scale = argument.parse_kwargs( kwargs, ('low', low), ('high', high), ('loc', loc), ('scale', scale)) if not (low is None or high is None) ^ (loc is None or scale is None): raise ValueError( "Either `low, high` or `loc, scale` (not both) must have a " "value.") with chainer.using_config('enable_backprop', True): if low is None: self.__loc = chainer.as_variable(loc) self.__scale = chainer.as_variable(scale) self.__low = self.__loc self.__high = self.__loc + self.__scale else: self.__low = chainer.as_variable(low) self.__high = chainer.as_variable(high) self.__loc = self.__low self.__scale = self.__high - self.__low
def reshape(x, shape): """Reshapes an input variable without copy. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. shape (:class:`tuple` of :class:`int` s): Expected shape of the output array. The number of elements which the array of ``shape`` contains must be equal to that of input array. One shape dimension can be -1. In this case, the value is inferred from the length of the array and remaining dimensions. Returns: ~chainer.Variable: Variable that holds a reshaped version of the input variable. .. seealso:: :func:`numpy.reshape`, :func:`cupy.reshape` .. admonition:: Example >>> x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) >>> y = F.reshape(x, (8,)) >>> y.shape (8,) >>> y.data array([1, 2, 3, 4, 5, 6, 7, 8]) >>> y = F.reshape(x, (4, -1)) # the shape of output is inferred >>> y.shape (4, 2) >>> y.data array([[1, 2], [3, 4], [5, 6], [7, 8]]) >>> y = F.reshape(x, (4, 3)) \ # the shape of input and output are not consistent Traceback (most recent call last): ... chainer.utils.type_check.InvalidType: Invalid operation is performed in: Reshape (Forward) Expect: prod(in_types[0].shape) == prod((4, 3)) Actual: 8 != 12 """ if x.shape == shape: return chainer.as_variable(x) y, = Reshape(shape).apply((x,)) return y
def ceil(x): """Elementwise ceil function. .. math:: y_i = \\lceil x_i \\rceil Args: x (~chainer.Variable): Input variable. Returns: ~chainer.Variable: Output variable. """ if isinstance(x, chainer.variable.Variable): x = x.data xp = backend.get_array_module(x) return chainer.as_variable(utils.force_array(xp.ceil(x), x.dtype))
def floor(x): """Elementwise floor function. .. math:: y_i = \\lfloor x_i \\rfloor Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. Returns: ~chainer.Variable: Output variable. """ if isinstance(x, chainer.variable.Variable): x = x.array xp = backend.get_array_module(x) return chainer.as_variable(utils.force_array(xp.floor(x), x.dtype))
def floor(x): """Elementwise floor function. .. math:: y_i = \\lfloor x_i \\rfloor Args: x (~chainer.Variable): Input variable. Returns: ~chainer.Variable: Output variable. """ if isinstance(x, chainer.variable.Variable): x = x.data xp = cuda.get_array_module(x) return chainer.as_variable(utils.force_array(xp.floor(x), x.dtype))
def fix(x): """Elementwise fix function. .. math:: y_i = \\lfix x_i \\rfix Args: x (~chainer.Variable): Input variable. Returns: ~chainer.Variable: Output variable. """ if isinstance(x, chainer.variable.Variable): x = x.array xp = backend.get_array_module(x) return chainer.as_variable(utils.force_array(xp.fix(x), x.dtype))
def __init__(self, a, b): super(Beta, self).__init__() self.__a = chainer.as_variable(a) self.__b = chainer.as_variable(b)
def p(self): if self.__p is not None: return chainer.as_variable(self.__p) else: return sigmoid.sigmoid(self.logit)
def b(self): return chainer.as_variable(self.__b)
def logit(self): if self.__logit is not None: return chainer.as_variable(self.__logit) else: return exponential.log(self.p) - logarithm_1p.log1p(-self.p)
def a(self): return chainer.as_variable(self.__a)
def __init__(self, mu, sigma): self.__mu = chainer.as_variable(mu) self.__sigma = chainer.as_variable(sigma)
def loc(self): return chainer.as_variable(self.__loc)
def scale(self): return chainer.as_variable(self.__scale)
def __init__(self, alpha): self.__alpha = chainer.as_variable(alpha)