def output(self, x, index_selection_func=None): out_idxs = self.out_idxs if index_selection_func is not None: out_idxs = index_selection_func(out_idxs) if self.l_params is None: sparse = sparse_block_dot_SS( self.W, x, self.in_idxs, self.b, out_idxs ) else: sparse = sparse_block_dot_SS( self.l_params[0].dimshuffle( *self.l_param_map[0] )*self.W, #self.W, x, self.in_idxs, self.l_params[1].dimshuffle( *self.l_param_map[1] )*self.b, out_idxs ) return (sparse if self.activation is None else self.activation(sparse))
def h_softmax_gpu(W1, b1, W2, b2, x, n_outputs, n_classes, n_outputs_per_class, batch_size, target=None): """ GPU-only version of a two-layer hierarchical softmax. See hierarchical_softmax's docstring for the description of the arguments. """ W1 = as_cuda_ndarray_variable(W1) b1 = as_cuda_ndarray_variable(b1) W2 = as_cuda_ndarray_variable(W2) b2 = as_cuda_ndarray_variable(b2) x = as_cuda_ndarray_variable(x) # First softmax which computes the probabilities of belonging to each class class_probs = tensor.nnet.softmax(tensor.dot(x, W1) + b1) if target is None: # Computes the probabilites of all the outputs class_ids = tensor.tile(tensor.arange(n_classes, dtype="int32")[None, :], (batch_size, 1)) # Second softmax that computes the output probabilities activations = sparse_block_dot_SS( W2[None, :, :, :], x[:, None, :], tensor.zeros((batch_size, 1), dtype='int32'), b2, class_ids) output_probs = tensor.nnet.softmax(activations.reshape((-1, n_outputs_per_class))) output_probs = output_probs.reshape((batch_size, n_classes, -1)) output_probs = class_probs[:, :, None] * output_probs output_probs = output_probs.reshape((batch_size, -1)) output_probs = output_probs[:, :n_outputs] else: # Computes the probabilities of the outputs specified by the targets # Flattens the targets target = target.flatten() # Classes to which belong each target target_classes = target // n_outputs_per_class # Outputs to which belong each target inside a class target_outputs_in_class = target % n_classes # Second softmax that computes the output probabilities activations = sparse_block_dot_SS( W2[None, :, :, :], x[:, None, :], tensor.zeros((batch_size, 1), dtype='int32'), b2, target_classes[:, None]) output_probs = tensor.nnet.softmax(activations[:, 0, :]) target_class_probs = class_probs[tensor.arange(batch_size), target_classes] output_probs = output_probs[tensor.arange(batch_size), target_outputs_in_class] output_probs = target_class_probs * output_probs return output_probs
def output(self, x, index_selection_func=None): if self.n_out > 1: iWin = self.k if self.n_in == 1: iWin = 1 rnd_proj = T.dot( x.reshape((x.shape[0], x.shape[1]*x.shape[2])), self.rand_proj_mat ) if index_selection_func is not None: self.out_idxs = index_selection_func(rnd_proj) else: self.out_idxs = T.argsort(rnd_proj) self.out_idxs = T.sort(self.out_idxs[:, -self.k:]) # self.out_idxs.set_value( # np.random.randint(0, self.n_out, (self.batch_size, self.k)) # ) sparse = sparse_block_dot_SS( self.W, x, self.in_idxs, self.b, self.out_idxs ) return (sparse if self.activation is None else self.activation(sparse))
def output(self, x, index_selection_func=None): out_idxs = self.out_idxs if index_selection_func is not None: out_idxs = index_selection_func(out_idxs) if self.l_params is None: sparse = sparse_block_dot_SS(self.W, x, self.in_idxs, self.b, out_idxs) else: sparse = sparse_block_dot_SS( self.l_params[0].dimshuffle(*self.l_param_map[0]) * self.W, #self.W, x, self.in_idxs, self.l_params[1].dimshuffle(*self.l_param_map[1]) * self.b, out_idxs) return (sparse if self.activation is None else self.activation(sparse))
def test_blocksparse(): b = tensor.fmatrix() W = tensor.ftensor4() h = tensor.ftensor3() iIdx = tensor.lmatrix() oIdx = tensor.lmatrix() o = sparse_block_dot_SS(W, h, iIdx, b, oIdx) f = theano.function([W, h, iIdx, b, oIdx], o) W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data() th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val) ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val) utt.assert_allclose(ref_out, th_out)
def test_blocksparse(): b = tensor.fmatrix() W = tensor.ftensor4() h = tensor.ftensor3() iIdx = tensor.lmatrix() oIdx = tensor.lmatrix() o = sparse_block_dot_SS(W, h, iIdx, b, oIdx) f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu) W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data() th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val) ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val) utt.assert_allclose(ref_out, th_out)
def test_blocksparseF(): b = tensor.fmatrix() W = tensor.ftensor4() h = tensor.ftensor3() iIdx = tensor.lmatrix() oIdx = tensor.lmatrix() o = sparse_block_dot_SS( GpuDimShuffle((False, False, False, False), (0, 1, 3, 2))(as_cuda_ndarray_variable(W)), h, iIdx, b, oIdx) f = theano.function([W, h, iIdx, b, oIdx], o) W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data() th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val) ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val) utt.assert_allclose(ref_out, th_out)
def test_blocksparseF(): b = tensor.fmatrix() W = tensor.ftensor4() h = tensor.ftensor3() iIdx = tensor.lmatrix() oIdx = tensor.lmatrix() o = sparse_block_dot_SS(GpuDimShuffle((False, False, False, False), (0, 1, 3, 2))( as_cuda_ndarray_variable(W)), h, iIdx, b, oIdx) f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu) W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data() th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val) ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val) utt.assert_allclose(ref_out, th_out)
def output(self, x, index_selection_func=None): if self.n_out > 1: iWin = self.k if self.n_in == 1: iWin = 1 rnd_proj = T.dot(x.reshape((x.shape[0], x.shape[1] * x.shape[2])), self.rand_proj_mat) if index_selection_func is not None: self.out_idxs = index_selection_func(rnd_proj) else: self.out_idxs = T.argsort(rnd_proj) self.out_idxs = T.sort(self.out_idxs[:, -self.k:]) # self.out_idxs.set_value( # np.random.randint(0, self.n_out, (self.batch_size, self.k)) # ) sparse = sparse_block_dot_SS(self.W, x, self.in_idxs, self.b, self.out_idxs) return (sparse if self.activation is None else self.activation(sparse))