class HardAttention(Attention): def __init__(self, inverse_temperture=1., semi_sampling_p=0.5, use_sampling=True, use_argmax=False, **kwargs): super(HardAttention, self).__init__(**kwargs) assert not all(use_sampling, use_argmax) self.inverse_temperture = sharedX(np.float32(inverse_temperture), name=self.layer_name + '_temperture_c') self.semi_sampling_p = sharedX(np.float32(semi_sampling_p), name=self.layer_name + '_semi_sampling_p') try: self.theano_rng = MRG_RandomStreams(123) # GPU rng except: self.theano_rng = T.shared_randomstreams.RandomStreams(123) self.use_sampling = use_sampling self.use_argmax = use_argmax def alpha(self, state, pctx): pre_alpha = self.inverse_temperture * self.pre_alpha(state, pctx) return T.nnet.sigmoid(pre_alpha) def argmax_alpha_sample(self, state, pctx, context): alpha = self.alpha(state, pctx) alpha_max = T.max(self.alpha, axis=1, keepdim=True) return T.cast(T.eq(T.arrange(alpha.shape[1])[None, :], alpha_max), 'float32') def random_alpha_sample(self, state, pctx, context): alpha = self.alpha(state, pctx) alpha_sample = self.theano_rng.multinominal(pvals=alpha, dtype='float32') semi_mask = self.theano_rng.binomianl(n=1, p=self.semi_sampling_p, size=(1,)).sum() return semi_mask * alpha_sample + (1 - semi_mask) * alpha_sample def alpha_sample(self, state, pctx, context): self.input_space.components[0].validate(context) self.input_space.validate((pctx, state)) if self.use_sampling: return self.random_alpha_sample(state, pctx, context) elif self.use_argmax: return self.argmax_alpha_sample(state, pctx, context)