def step(self, x, states): M = states[0] # (nb_samples, nb_slots, memory_size) h = states[1] # (nb_samples, memory_size) w = states[2] # (nb_samples, nb_slots) #------Memory read--------# k = self.W_k(h) # (nb_samples, memory_size) w_hat = T.batched_tensordot(M, k, axes=[(2), (1)]) # (nb_samples, nb_slots) beta = K.sigmoid(self.W_b(h)) # (nb_samples, 1) beta = K.repeat(beta, self.nb_slots) # (nb_samples, nb_slots, 1) beta = K.squeeze(beta, 2) # (nb_samples, nb_slots) w_hat = softmax(w_hat * beta) # (nb_samples, nb_slots) g = sigmoid(self.W_hg(h)) # (nb_samples, 1) g = K.repeat(g, self.nb_slots) # (nb_samples, nb_slots, 1) g = K.squeeze(g, 2) # (nb_samples, nb_slots) w = (1 - g) * w + g * w_hat # (nb_samples, nb_slots) c = T.batched_tensordot(w, M, axes=[(1), (1)]) h = tanh(self.W_ih(x) + self.W_c(c)) y = self.W_ho(h) #---------Memory write---------# v = self.W_v(h) # (nb_samples, memory_size) v = K.repeat(v, 1) e = sigmoid(self.W_he(h)) # (nb_samples, nb_slots) f = 1 - w * e # (nb_samples, nb_slots) f = K.repeat(f, self.memory_size) # (nb_samples, memory_size, nb_slots) f = K.permute_dimensions( f, (0, 2, 1)) # (nb_samples, nb_slots, memory_size) u = w # (nb_samples, nb_slots) u = K.repeat(u, 1) uv = T.batched_tensordot(u, v, axes=[(1), (1)]) M = M * f + uv return y, [M, h, w]
def step(self, x, states): M = states[0] # (nb_samples, nb_slots, memory_size) h = states[1] # (nb_samples, memory_size) w = states[2] # (nb_samples, nb_slots) #------Memory read--------# k = self.W_k(h) # (nb_samples, memory_size) w_hat = T.batched_tensordot(M, k, axes=[(2), (1)]) # (nb_samples, nb_slots) beta = K.sigmoid(self.W_b(h)) # (nb_samples, 1) beta = K.repeat(beta, self.nb_slots) # (nb_samples, nb_slots, 1) beta = K.squeeze(beta, 2) # (nb_samples, nb_slots) w_hat = softmax(w_hat * beta) # (nb_samples, nb_slots) g = sigmoid(self.W_hg(h)) # (nb_samples, 1) g = K.repeat(g, self.nb_slots) # (nb_samples, nb_slots, 1) g = K.squeeze(g, 2) # (nb_samples, nb_slots) w = (1 - g) * w + g * w_hat # (nb_samples, nb_slots) c = T.batched_tensordot(w, M, axes=[(1), (1)]) h = tanh(self.W_ih(x) + self.W_c(c)) y = self.W_ho(h) #---------Memory write---------# v = self.W_v(h) # (nb_samples, memory_size) v = K.repeat(v, 1) e = sigmoid(self.W_he(h)) # (nb_samples, nb_slots) f = 1 - w * e # (nb_samples, nb_slots) f = K.repeat(f, self.memory_size) # (nb_samples, memory_size, nb_slots) f = K.permute_dimensions(f, (0, 2, 1)) # (nb_samples, nb_slots, memory_size) u = w # (nb_samples, nb_slots) u = K.repeat(u, 1) uv = T.batched_tensordot(u, v, axes=[(1), (1)]) M = M * f + uv return y, [M, h, w]
def negFactorization1(self, batchSize, negEmbA, argsEmbB, wC): # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] Afirst = T.batched_tensordot( wC, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l, k, k] * [n, l, k] = [l, k, n] Asecond = T.batched_tensordot(Afirst, argsEmbB, axes=[[1], [1] ]) # [l, k, n] * [l, k] = [l, n] return Asecond
def __init__(self, intpic_parameters=None, case_costs=None, pics=None, case_labels=None, batch_size=None, pic_size=None, label_count=None, **kwargs): super(IntpicGradientDescent, self).__init__(**kwargs) center_val = 0.5 self.input_pics = pics self.case_costs = case_costs self.batch_size = batch_size self.label_count = label_count self.intpic_parameters = intpic_parameters self.jacobians = self._compute_jacobians() self.gradpics = OrderedDict([ (param, _create_intpic_histogram_for(param, pic_size, label_count)) for param in self.intpic_parameters ]) self.intpics = OrderedDict([ (param, _create_intpic_histogram_for(param, pic_size, label_count)) for param in self.intpic_parameters ]) # attributes pics: (cases, picy, picx) to (cases, labels, picy, picx) # attributed_pics = tensor.batched_tensordot( # tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count), # pics[:, 0, :, :], axes=0) zeroed_pics = pics - 0.5 attributed_pics = tensor.batched_tensordot(tensor.extra_ops.to_one_hot( case_labels.flatten(), label_count), zeroed_pics[:, 0, :, :], axes=0) self.gradpic_updates = OrderedDict([ _create_gradpic_updates(self.gradpics[param], self.jacobians[param], attributed_pics) for param in self.intpic_parameters ]) self.add_updates(self.gradpic_updates) intensity_pics = (zeroed_pics * gradient.grad(case_costs.mean(), pics)) attributed_i_pics = tensor.batched_tensordot( tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count), intensity_pics[:, 0, :, :], axes=0) self.intpic_updates = OrderedDict([ _create_intensity_updates(self.intpics[param], self.jacobians[param], attributed_i_pics) for param in self.intpic_parameters ]) self.add_updates(self.intpic_updates)
def get_output_for(self, inputs, **flags): u, mu, L = inputs # batch-wise matrix multiplication P = L * L.T P = T.batched_tensordot(L, L.swapaxes(2, 1), axes=[2, 1]) # (u-mu) * P for each batch diff_times_P = T.batched_tensordot((u - mu), P, axes=[1, 2]) # A = - 0.5 * (u-mu) * P * (u-mu).T A = -0.5 * T.batched_dot(diff_times_P, (u - mu)) return A
def negRightFactorization(self, batchSize, argsEmbA, negEmbB, wC, wC1, wC2): Afirst = T.batched_tensordot(wC, argsEmbA, axes=[[1], [1]]) # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_tensordot(Afirst, negEmbB.dimshuffle(1, 2, 0), axes=[[1], [1] ]) # [l, k] * [l, k, n] = [l, n] spFirst = T.batched_dot(wC1, argsEmbA) spAsecond = T.batched_tensordot(wC2, negEmbB.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] return Asecond + spAsecond + spFirst.reshape((batchSize, 1))
def __init__(self, intpic_parameters=None, case_costs=None, pics=None, case_labels=None, batch_size=None, pic_size=None, label_count=None, **kwargs): super(IntpicGradientDescent, self).__init__(**kwargs) center_val = 0.5 self.input_pics = pics self.case_costs = case_costs self.batch_size = batch_size self.label_count = label_count self.intpic_parameters = intpic_parameters self.jacobians = self._compute_jacobians() self.gradpics = OrderedDict( [(param, _create_intpic_histogram_for(param, pic_size, label_count)) for param in self.intpic_parameters]) self.intpics = OrderedDict( [(param, _create_intpic_histogram_for(param, pic_size, label_count)) for param in self.intpic_parameters]) # attributes pics: (cases, picy, picx) to (cases, labels, picy, picx) # attributed_pics = tensor.batched_tensordot( # tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count), # pics[:, 0, :, :], axes=0) zeroed_pics = pics - 0.5 attributed_pics = tensor.batched_tensordot( tensor.extra_ops.to_one_hot( case_labels.flatten(), label_count), zeroed_pics[:, 0, :, :], axes=0) self.gradpic_updates = OrderedDict( [_create_gradpic_updates( self.gradpics[param], self.jacobians[param], attributed_pics) for param in self.intpic_parameters]) self.add_updates(self.gradpic_updates) intensity_pics = (zeroed_pics * gradient.grad(case_costs.mean(), pics)) attributed_i_pics = tensor.batched_tensordot( tensor.extra_ops.to_one_hot( case_labels.flatten(), label_count), intensity_pics[:, 0, :, :], axes=0) self.intpic_updates = OrderedDict( [_create_intensity_updates( self.intpics[param], self.jacobians[param], attributed_i_pics) for param in self.intpic_parameters]) self.add_updates(self.intpic_updates)
def __init__(self, input, n_in, n_out): ## input has shape (batchSize, seqLen, n_in) ## input shall be a binary tensor, each row has only one 1 self.n_in = n_in self.n_out = n_out self.input = input value_bound = np.sqrt(6./(n_in * n_in + n_out)) W_values = np.asarray(np.random.uniform(low = - value_bound, high = value_bound, size=(n_in, n_in, n_out)), dtype=theano.config.floatX) self.W = theano.shared (value = W_values, name = 'EmbeddingLayer_W', borrow=True) ## out1 shall have shape (batchSize, seqLen, n_in, n_out) out1 = T.tensordot(input, self.W, axes=1) ##out2 has shape(batchSize, n_out, seqLen, n_in) out2 = out1.dimshuffle(0, 3, 1, 2) ##input2 has shape(batchSize, n_in, seqLen) input2 = input.dimshuffle(0,2,1) ##out3 shall have shape (batchSize, n_out, seqLen, seqLen) out3 = T.batched_tensordot(out2, input2, axes=1) ##output has shape (batchSize, seqLen, seqLen, n_out) self.output = out3.dimshuffle(0, 2, 3, 1) self.params = [self.W] self.paramL1 = abs(self.W).sum() self.paramL2 = (self.W**2).sum() ##self.pcenters = (self.W.sum(axis=[0, 1])**2 ).sum() self.pcenters = (self.W.mean(axis=[0, 1])**2 ).sum()
def get_output_for(self, inputs, **flags): u, mu, L = inputs # batch-wise matrix multiplication P = L * L.T P = T.batched_tensordot(L, L.swapaxes(2, 1), axes=[2, 1]) # (u-mu) * P for each batch diff_times_P = T.batched_tensordot((u - mu), P, axes=[1, 2]) # A = - 0.5 * (u-mu) * P * (u-mu).T A = -0.5 * T.batched_dot(diff_times_P, (u - mu))[:,None] #shape = (None,1) assert A.ndim ==2 return A
def get_output_for(self, input, **kwargs): _pred = T.batched_tensordot(input.flatten(ndim=3).dimshuffle((2,0,1)), \ self.W.flatten(ndim=2).dimshuffle((1,0)), axes=[[2],[1]]) \ .dimshuffle((1,0)).reshape((input.shape[0],self.nv,self.nt)) if self.b is not None: _pred = _pred + self.b return self.nonlinearity(_pred)
def get_output_for(self, input, **kwargs): _pred = T.batched_tensordot(input.dimshuffle((2, 0, 1)), self.W.dimshuffle((2, 0, 1)), axes=[[2], [1]]).dimshuffle((1, 2, 0)) if self.b is not None: _pred = _pred + self.b return self.nonlinearity(_pred)
def batch_dot(x, y, axes=None): '''batchwise dot product batch_dot results in a tensor with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Example Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]] batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal of x.dot(y.T), although we never have to calculate the off-diagonal elements. # Arguments x, y: tensors with ndim >= 2 axes: list (or single) int with target dimensions # Returns Tensor with ndim >= 2 ''' if type(axes) == int: axes = (axes, axes) if axes is None: # behaves like tf.batch_matmul as default axes = [x.ndim - 1, y.ndim - 2] out = T.batched_tensordot(x, y, axes=axes) if ndim(out) == 1: out = expand_dims(out, 1) return out
def factorization(self, batchSize, argsEmbA, argsEmbB, wC): # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] Afirst = T.batched_tensordot(wC, argsEmbA, axes=[[1], [1]]) # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_dot(Afirst, argsEmbB) # [l, k] * [l, k] = [l] # entropy = T.sum(T.log(relationProbs) * relationProbs, axis=1) # [l,r] * [l,r] = [l] return Asecond
def negRightMostFactorization(self, batchSize, negEmbed, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number # second = T.dot(relationProbs, self.C2.dimshuffle(1, 0)) # [l,r] * [r,k] = [l, k] Asecond = T.batched_tensordot(wC2, negEmbed.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] return Asecond
def negLeftMostFactorization(self, batchSize, negEmbed, wC1): # l = batchSize # k = self.k # embed size # r = self.r # relation number # first = T.dot(relationProbs, self.C1.dimshuffle(1, 0)) # [l,r] * [r,k] = [l, k] Afirst = T.batched_tensordot(wC1, negEmbed.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] return Afirst
def batch_dot(x, y, axes=None): '''batchwise dot product batch_dot results in a tensor with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Example Assume x = [[1, 2] and y = [[5, 6] [3, 4]] [7, 8]] batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal of x.dot(y.T), although we never have to calculate the off-diagonal elements. # Arguments x, y: tensors with ndim >= 2 axes: list (or single) int with target dimensions # Returns Tensor with ndim >= 2 ''' if type(axes) == int: axes = (axes, axes) if axes is None: # behaves like tf.batch_matmul as default axes = [x.ndim - 1, y.ndim - 2] out = T.batched_tensordot(x, y, axes=axes) if ndim(out) == 1: out = expand_dims(out, 1) return out
def negLeftFactorization(self, batchSize, negEmbA, argsEmbB, wC, wC1, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number # argEmbedsA = self.A[argsA.flatten()] # [l,k] # argEmbedsB = self.A[argsB.flatten()] # [l,k] # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] Afirst = T.batched_tensordot(wC, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l, k, k] * [n, l, k] = [l, k, n] Asecond = T.batched_tensordot(Afirst, argsEmbB, axes=[[1], [1]]) # [l, k, n] * [l, k] = [l, n] # spFirst = T.dot(relationProbs, self.C1.dimshuffle(1, 0)) # [l,r] * [r,k] = [l, k] spAfirst = T.batched_tensordot(wC1, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] spSecond = T.batched_dot(wC2, argsEmbB) return Asecond + spAfirst + spSecond.reshape((batchSize, 1))
def factorization(self, batchSize, argsEmbA, argsEmbB, wC): # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot Afirst = T.batched_tensordot(wC, argsEmbA, axes=[[1], [1]]) # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_dot(Afirst, argsEmbB) # [l, k] * [l, k] = [l] # entropy = T.sum(T.log(relationProbs) * relationProbs, axis=1) # [l,r] * [l,r] = [l] return Asecond
def negRightMostFactorization(self, batchSize, negEmbed, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number Asecond = T.batched_tensordot(wC2, negEmbed.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] return Asecond
def f_inner(cross_beamShape_r, cross_beamShape_i, baseline_x, baseline_y, xk, yk): phase = TT.mul(TT.shape_padleft(xk, n_ones=3), TT.shape_padright(baseline_x, n_ones=1)) + \ TT.mul(TT.shape_padleft(yk, n_ones=3), TT.shape_padright(baseline_y, n_ones=1)) cos_phase, sin_phase = TT.cos(phase), TT.sin(phase) beamforming_weight_r = \ TT.batched_tensordot(cos_phase, cross_beamShape_r, axes=[[1, 2], [1, 2]]) + \ TT.batched_tensordot(sin_phase, cross_beamShape_i, axes=[[1, 2], [1, 2]]) beamforming_weight_i = \ TT.batched_tensordot(cos_phase, cross_beamShape_i, axes=[[1, 2], [1, 2]]) - \ TT.batched_tensordot(sin_phase, cross_beamShape_r, axes=[[1, 2], [1, 2]]) return beamforming_weight_r, beamforming_weight_i
def theano_dot_last_dimension_matrices(x, y): if x.ndim == 3 and y.ndim == 3: if ("theano" in str(type(x)) and x.broadcastable[0] == False)\ or ("numpy" in str(type(x)) and x.shape[0] != 1): return T.batched_dot(x, y) else: return T.tensordot(x[0, :, :], y, axes=[[1], [1]]) else: return T.batched_tensordot(x, y, axes=[(x.ndim - 1, ), (y.ndim - 2, )])
def attention(self, m, q, mask): # mask original shape is (batch*memory_length, input_length, 1) # shape (batch, memory) mask = K.reshape(mask[:, 0], (-1, self.memory_length)) # shape: (batch, memory_length, 1) p = T.batched_tensordot(m, q, (2, 2)) # shape: (batch, memory_length) p = K.softmax(p[:, :, 0]) # * K.cast(mask, 'float32') # shape: (batch, 1, memory_length) return K.expand_dims(p, dim=1)
def batch_matmul(x, y, adj_x = False, adj_y = False): # used for tensor, i.e., if x and y are matrix, then use matrix # multiplication, else batch_matmul # TODO: interfaces with tensorflow backed axes = (x.ndim - 1, y.ndim - 2) if adj_x == True: axes[0] = x.ndim - 2 if adj_y == True: axes[1] = y.ndim - 1 return T.batched_tensordot(x, y, axes = axes)
def __init__(self, linp, rinp, n_in, n_out): self.W = theano.shared(value=np.zeros((n_in, n_out), dtype=theano.config.floatX), name='W') self.b = theano.shared(value=np.cast[theano.config.floatX](0.), name='b') self.scores = T.batched_tensordot(T.dot(linp, self.W), rinp, [[1], [2]]) + self.b self.l2_norm = T.sum(self.W**2) self.params = [self.W, self.b]
def get_output_for(self, inputs, **kwargs): """ :param inputs: inputs: list of theano.TensorType `inputs[0]` should always be the symbolic input variable. When this layer has a mask input (i.e. was instantiated with `mask_input != None`, indicating that the lengths of sequences in each batch vary), `inputs` should have length 2, where `inputs[1]` is the `mask`. The `mask` should be supplied as a Theano variable denoting whether each time step in each sequence in the batch is part of the sequence or not. `mask` should be a matrix of shape ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length of sequence i)``. :return: theano.TensorType Symbolic output variable. """ input = inputs[0] mask = None if self.mask_incoming_index > 0: mask = inputs[self.mask_incoming_index] # compute the bi-affine part # first via tensor dot ([batch, length, dim] * [dim, dim, num_label]) # output shape = [batch, length, dim, num_label] out = T.tensordot(input, self.U, axes=[[2], [0]]) # second via tensor dot ([batch, length, dim, num_label] * [batch, dim, length) # output shape = [batch, length, length, num_label] out = T.batched_tensordot(out, input.dimshuffle(0, 2, 1), axes=([2], [1])) out = out.dimshuffle(0, 1, 3, 2) # compute head bias part by tensor dot ([batch, length, dim] * [dim, num_label]) # the shape of s_h should be [batch, length, num_label] if self.W_h is not None: s_h = T.tensordot(input, self.W_h, axes=[[2], [0]]) out = out + s_h.dimshuffle(0, 1, 'x', 2) # compute child part by tensor dot ([batch, length, dim] * [dim, num_label] # the shape of s_c should be [batch, length, num_label] if self.W_c is not None: s_c = T.tensordot(input, self.W_c, axes=[[2], [0]]) out = out + s_c.dimshuffle(0, 'x', 1, 2) # add bias part. if self.b is not None: out = out + self.b.dimshuffle('x', 'x', 'x', 0) if mask is not None: mask_shuffled = mask.dimshuffle(0, 1, 'x', 'x') out = out * mask_shuffled mask_shuffled = mask.dimshuffle(0, 'x', 1, 'x') out = out * mask_shuffled return out
def negLeftFactorization(self, batchSize, negEmbA, argsEmbB, wC, wC1, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number Afirst = T.batched_tensordot( wC, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l, k, k] * [n, l, k] = [l, k, n] Asecond = T.batched_tensordot(Afirst, argsEmbB, axes=[[1], [1] ]) # [l, k, n] * [l, k] = [l, n] spAfirst = T.batched_tensordot(wC1, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] spSecond = T.batched_dot(wC2, argsEmbB) return Asecond + spAfirst + spSecond.reshape((batchSize, 1))
def __init__(self, input, n_in, n_out, box=None): ## input has shape (batchSize, seqLen, n_in) ## input shall be a binary tensor, each row has only one 1 self.n_in = n_in self.n_out = n_out self.input = input if box is not None: top = box[0] left = box[1] bottom = box[2] right = box[3] else: top = 0 left = 0 bottom = input.shape[1] right = input.shape[1] value_bound = np.sqrt(6. / (n_in * n_in + n_out)) W_values = np.asarray(np.random.uniform(low=-value_bound, high=value_bound, size=(n_in, n_in, n_out)), dtype=theano.config.floatX) self.W = theano.shared(value=W_values, name='EmbeddingLayer_W', borrow=True) ## out1 shall have shape (batchSize, bottom-top, n_in, n_out) if box is None: out1 = T.tensordot(input, self.W, axes=1) else: out1 = T.tensordot(input[:, top:bottom, :], self.W, axes=1) ##out2 has shape(batchSize, n_out, bottom-top, n_in) out2 = out1.dimshuffle(0, 3, 1, 2) ##input2 has shape(batchSize, n_in, right-left) if box is None: input2 = input.dimshuffle(0, 2, 1) else: input2 = input.dimshuffle(0, 2, 1)[:, :, left:right] ##out3 shall have shape (batchSize, n_out, bottom-top, right-left) out3 = T.batched_tensordot(out2, input2, axes=1) ##output has shape (batchSize, bottom-top, right-left, n_out) self.output = out3.dimshuffle(0, 2, 3, 1) self.params = [self.W] self.paramL1 = abs(self.W).sum() self.paramL2 = (self.W**2).sum() ##self.pcenters = (self.W.sum(axis=[0, 1])**2 ).sum() self.pcenters = (self.W.mean(axis=[0, 1])**2).sum()
def ortho_res(z): s = 0 for x in z: if x.name[-1] is 'W' and x.ndim == 4: y = T.batched_tensordot(x, x.dimshuffle(0, 1, 3, 2), [[1, 3], [1, 2]]) y -= T.eye(x.shape[2], x.shape[3]).dimshuffle('x', 0, 1).repeat(x.shape[0], 0) s += T.sum(T.abs_(y)) return (s)
def factorization(self, batchSize, argsEmbA, argsEmbB, wC, wC1, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number Afirst = T.batched_tensordot( wC, argsEmbA, axes=[[1], [1]]) # + self.Cb # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_dot(Afirst, argsEmbB) # [l, k] * [l, k] = [l] spFirst = T.batched_dot(wC1, argsEmbA) spSecond = T.batched_dot(wC2, argsEmbB) return Asecond + spFirst + spSecond
def compute_grad_term(): """ TODO: This implementatio is way too slow. THIS IS THE MOST IMPORTANT THING TO BE DONE """ self.grad_AA_NTxdxdxd = covariance_time_series_grad(self.AA, self.Y, self.X, self.xDim) self.grad_BB_NTm1xdxdxd = covariance_time_series_grad(self.BB, self.Y, self.X, self.xDim, remove_from_tail=1) # gradA_func = theano.function([self.X, self.Y], self.grad_AA_NTxdxdxd) # assert False test_AA = T.batched_tensordot(Xflat, T.batched_tensordot(self.grad_AA_NTxdxdxd, Xflat, axes=[[3],[1]]), axes=[[1],[2]]).reshape([self.Nsamps, self.Tbins, self.xDim]) # test_AA_func = theano.function([self.X, self.Y], test_AA) test_BB = T.batched_tensordot(Xflat01, T.batched_tensordot(self.grad_BB_NTm1xdxdxd, Xflat10, axes=[[3],[1]]), axes=[[1],[2]]) test_BB = T.concatenate([test_BB.reshape([self.Nsamps, self.Tbins-1, self.xDim]), T.zeros([self.Nsamps, 1, self.xDim])], axis=1) final_term = -0.5*test_AA - test_BB return final_term
def test_vector_matrix(): x = T.matrix("x", dtype='float32') y = T.tensor3("y", dtype='float32') vars = [x, y] f0 = theano.function(vars, T.sum(x[:, :, None] * y[:, :, :], axis=-2)) f1 = theano.function(vars, T.batched_dot(x[:, None, :], y)[:, 0, :]) f2 = theano.function( vars, T.batched_tensordot(x, y, axes=[(x.ndim - 1, ), (y.ndim - 2, )])) fs = [f0, f1, f2] test(vars, fs)
def factorization(self, batchSize, argsEmbA, argsEmbB, wC, wC1, wC2): # l = batchSize # k = self.k # embed size # r = self.r # relation number # argEmbedsA = self.A[argsA.flatten()] # [l,k] # argEmbedsB = self.A[argsB.flatten()] # [l,k] # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] Afirst = T.batched_tensordot(wC, argsEmbA, axes=[[1], [1]]) # + self.Cb # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_dot(Afirst, argsEmbB) # [l, k] * [l, k] = [l] # entropy = T.sum(T.log(relationProbs) * relationProbs, axis=1) # [l,r] * [l,r] = [l] spFirst = T.batched_dot(wC1, argsEmbA) spSecond = T.batched_dot(wC2, argsEmbB) return Asecond + spFirst + spSecond
def get_output_for(self, inputs, **kwargs): batch_size, _, image_height, image_width = self.input_shapes[1] window_height, window_width = self.window_shape center_x, center_y, width_x, width_y, logsigma2, gamma = [ inputs[0][:, j] for j in xrange(6) ] Fx = filterbank(center_x, width_x, logsigma2, shape=(batch_size, window_width, image_width)) Fy = filterbank(center_y, width_y, logsigma2, shape=(batch_size, window_height, image_height)) gamma = T.patternbroadcast(T.reshape(gamma, (batch_size, 1, 1, 1)), [False, True, True, True]) _tmp = T.batched_tensordot(inputs[1], Fy, [2, 2]) _tmp = T.batched_tensordot(_tmp, Fx, [2, 2]) return _tmp * gamma
def theano_dot_last_dimension_vector_matrix(x, y): if x.ndim == 2 and y.ndim == 3: if ("theano" in str(type(x)) and x.broadcastable[0] == False)\ or ("numpy" in str(type(x)) and x.shape[0] != 1): return T.batched_dot(x[:, None, :], y)[:, 0, :] else: return T.tensordot(x[0, :], y, axes=[[0], [1]]) elif x.ndim == 2: return T.batched_tensordot(x, y, axes=[(x.ndim - 1, ), (y.ndim - 2, )]) elif x.ndim == y.ndim - 1: idcs = [slice(None)] * x.ndim + [None] return T.sum(x[idcs] * y, axis=x.ndim - 1) else: print "->", x.ndim, y.ndim raise NotImplementedError()
def make_batched_regression(_mst_data, nf, nv, nc, add_bias=True): _W = theano.shared(np.zeros(shape=(nv, nc, nf), dtype=fpX)) ### place voxel-candidate as the first dimension to be batched over. _pred = T.batched_tensordot(_mst_data.flatten(ndim=3).dimshuffle((2,0,1)), \ _W.reshape((nv*nc, nf)), axes=[[2],[1]]) \ .dimshuffle((1,0)).reshape((_mst_data.shape[0],nv,nc)) params = [ _W, ] if add_bias: _b = theano.shared(np.zeros(shape=(1, nv, nc), dtype=fpX)) _pred = _pred + T.patternbroadcast(_b, (True, False, False)) params += [ _b, ] return _pred, params
def output(self, input_vectors, input_scalars): """ Calculate the n_output transformed vectors for this layer @param input_scalars: n_input x n_output scalar vector @param input_vectors: n_input vectors (actual shape should be (n_batch, n_input, n_dimension) """ mat = input_scalars.reshape((n_batch, self.n_input, self.n_output)) z = T.batched_tensordot(input_vectors, mat, [[1], [1]]).swapaxes(1, 2) + T.addbroadcast(self.b, 0, 2) if self.activation == 'linear': return z elif self.activation == 'rectified': return T.maximum(z, 0) elif self.activation == 'tanh': return T.tanh(z) else: raise "Unknown activation, %s" % self.activation
def get_output_for(self, inputs, **kwargs): Q = inputs[0] A = inputs[1] QU = T.tensordot(Q, self.U, axes=[2, 1]) # (BSIZE, dim1, NROW, DIM) QUA = T.batched_tensordot(QU, A, axes=[3, 2]).dimshuffle(0, 2, 1, 3) G = T.tanh(QUA) # (BSIZE, NROW, dim1, dim2) if self.have_mask: Qmask = inputs[2] Amask = inputs[3] Gmask = T.batched_dot(Qmask.dimshuffle(0, 1, 'x'), Amask.dimshuffle(0, 'x', 1)).dimshuffle( 0, 'x', 1, 2) G = G * Gmask - (1 - Gmask) # pad -1 to trailing spaces. return G
def batch_dot(x, y, axes=None): '''Batchwise dot product. batch_dot results in a tensor with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Arguments x, y: tensors with ndim >= 2 axes: list (or single) int with target dimensions # Returns A tensor with shape equal to the concatenation of x's shape (less the dimension that was summed over) and y's shape (less the batch dimension and the dimension that was summed over). If the final rank is 1, we reshape it to (batch_size, 1). # Examples Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]] batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal of x.dot(y.T), although we never have to calculate the off-diagonal elements. Shape inference: Let x's shape be (100, 20) and y's shape be (100, 30, 20). If dot_axes is (1, 2), to find the output shape of resultant tensor, loop through each dimension in x's shape and y's shape: x.shape[0] : 100 : append to output shape x.shape[1] : 20 : do not append to output shape, dimension 1 of x has been summed over. (dot_axes[0] = 1) y.shape[0] : 100 : do not append to output shape, always ignore first dimension of y y.shape[1] : 30 : append to output shape y.shape[2] : 20 : do not append to output shape, dimension 2 of y has been summed over. (dot_axes[1] = 2) output_shape = (100, 30) ''' if type(axes) == int: axes = (axes, axes) if axes is None: # behaves like tf.batch_matmul as default axes = [x.ndim - 1, y.ndim - 2] out = T.batched_tensordot(x, y, axes=axes) if ndim(out) == 1: out = expand_dims(out, 1) return out
def get_output(self, train=False): if self.mode == 'sum' or self.mode == 'ave': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s += self.layers[i].get_output(train) if self.mode == 'ave': s /= len(self.layers) return s elif self.mode == 'concat': inputs = [ self.layers[i].get_output(train) for i in range(len(self.layers)) ] return T.concatenate(inputs, axis=self.concat_axis) elif self.mode == 'join': inputs = OrderedDict() for i in range(len(self.layers)): X = self.layers[i].get_output(train) if X.name is None: raise ValueError( "merge_mode='join' only works with named inputs") else: inputs[X.name] = X return inputs elif self.mode == 'mul': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s *= self.layers[i].get_output(train) return s elif self.mode == 'dot': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output = T.batched_tensordot(l1, l2, self.dot_axes) output_shape = list(self.output_shape) output_shape[0] = l1.shape[0] output = output.reshape(tuple(output_shape)) return output elif self.mode == 'cos': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output, _ = theano.scan(lambda v1, v2: T.dot(v1, v2) / T.sqrt( T.dot(v1, v1) * T.dot(v2, v2)), sequences=[l1, l2], outputs_info=None) return output else: raise Exception('Unknown merge mode')
def get_output(self, train=False): if self.mode == 'sum' or self.mode == 'ave': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s += self.layers[i].get_output(train) if self.mode == 'ave': s /= len(self.layers) return s elif self.mode == 'concat': inputs = [self.layers[i].get_output(train) for i in range(len(self.layers))] return T.concatenate(inputs, axis=self.concat_axis) elif self.mode == 'join': inputs = OrderedDict() for i in range(len(self.layers)): X = self.layers[i].get_output(train) if X.name is None: raise ValueError("merge_mode='join' only works with named inputs") else: inputs[X.name] = X return inputs elif self.mode == 'mul': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s *= self.layers[i].get_output(train) return s elif self.mode == 'dot': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output = T.batched_tensordot(l1, l2, self.dot_axes) output_shape = list(self.output_shape) output_shape[0] = l1.shape[0] output = output.reshape(tuple(output_shape)) return output elif self.mode == 'inner': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output =T.sum(l1 * l2, axis=-1) return output elif self.mode == 'cos': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output, _ = theano.scan(lambda v1, v2: T.dot(v1, v2) / T.sqrt(T.dot(v1, v1) * T.dot(v2, v2)), sequences=[l1, l2], outputs_info=None) return output else: raise Exception('Unknown merge mode')
def output(self, input_vectors, input_scalars): """ Calculate the n_output transformed vectors for this layer @param input_scalars: n_input x n_output scalar vector @param input_vectors: n_input vectors (actual shape should be (n_batch, n_input, n_dimension) """ mat = input_scalars.reshape((n_batch, self.n_input, self.n_output)) z = T.batched_tensordot(input_vectors, mat, [[1], [1]]).swapaxes( 1, 2) + T.addbroadcast(self.b, 0, 2) if self.activation == 'linear': return z elif self.activation == 'rectified': return T.maximum(z, 0) elif self.activation == 'tanh': return T.tanh(z) else: raise "Unknown activation, %s" % self.activation
def weighted_average(inp, weights, axis=None): # n_b x n_s x 4 x n_w_a: inp if axis == 2: # for question weights = weights.flatten(ndim=2) weights /= T.sum(weights, axis=1, keepdims=True) + 0.000001 return T.batched_tensordot(inp, weights, [[inp.ndim - 1], [1]]) elif axis == 3: # for answer inp: (None, 51, 4, 20), output: (None, 4, 20, 1) weights = weights.flatten(ndim=weights.ndim - 1) weights /= T.sum(weights, axis=weights.ndim - 1, keepdims=True) + 0.000001 weights = weights.dimshuffle(0, 'x', 1, 2) return T.sum(inp * weights, axis=3) elif axis == 4: # for inner sliding window weights = weights.flatten(ndim=weights.ndim - 1) weights /= T.sum(weights, axis=weights.ndim - 1, keepdims=True) + 0.000001 weights = weights.dimshuffle(0, 'x', 'x', 1, 2) return T.sum(inp * weights, axis=4) else: raise RuntimeError
def get_output(self, train=False): if self.mode == "sum" or self.mode == "ave": s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s += self.layers[i].get_output(train) if self.mode == "ave": s /= len(self.layers) return s elif self.mode == "concat": inputs = [self.layers[i].get_output(train) for i in range(len(self.layers))] return T.concatenate(inputs, axis=self.concat_axis) elif self.mode == "join": inputs = OrderedDict() for i in range(len(self.layers)): X = self.layers[i].get_output(train) if X.name is None: raise ValueError("merge_mode='join' only works with named inputs") else: inputs[X.name] = X return inputs elif self.mode == "mul": s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s *= self.layers[i].get_output(train) return s elif self.mode == "dot": l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output = T.batched_tensordot(l1, l2, self.dot_axes) return output elif self.mode == "cos": l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output, _ = theano.scan( lambda v1, v2: T.dot(v1, v2) / T.sqrt(T.dot(v1, v1) * T.dot(v2, v2)), sequences=[l1, l2], outputs_info=None, ) return output else: raise Exception("Unknown merge mode")
def step(i, in_mask, ACT, ACT_, in_se, WT): sub_tree_idx_ = T.nonzero(WT[:, i, :] > -1) a_ = T.dot(in_se[:, i], self.WSM) # + self.b if self.b is not None: a_ += self.b.dimshuffle('x', 0) a_ = a_ + T.sum(ACT_[:, i], axis=1) a_ = T.tanh(a_) # if self.dropout: # a_ = a_ / self.retain_prob * self._srng.binomial(a_.shape, p=self.retain_prob, # dtype=theano.config.floatX) a_ = T.switch(in_mask, a_, ACT[:, i-1]) a__ = T.batched_tensordot(a_[sub_tree_idx_[0], :], self.WC[WT[sub_tree_idx_[0], i, sub_tree_idx_[1]]], axes=1) # if self.dropout: # a__ = a__ / self.retain_prob * self._srng.binomial(a__.shape, p=self.retain_prob, # dtype=theano.config.floatX) newACT_ = T.set_subtensor(ACT_[sub_tree_idx_[0], sub_tree_idx_[1], i], a__) newACT = T.set_subtensor(ACT[:, i], a_) return newACT, newACT_
def __init__(self, actpic_variables=None, pics=None, case_labels=None, label_count=None, data_stream=None, rectify=False, **kwargs): center_val = 0.5 self.input_pics = pics # self.batch_size = batch_size # self.label_count = label_count self.actpic_variables = actpic_variables # attributes pics: (cases, picy, picx) to (cases, labels, picy, picx) # attributed_pics = tensor.batched_tensordot( # tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count), # pics[:, 0, :, :], axes=0) zeroed_pics = pics - 0.5 attributed_pics = tensor.batched_tensordot( tensor.extra_ops.to_one_hot( case_labels.flatten(), label_count), zeroed_pics[:, 0, :, :], axes=0) self.actpics = [self._create_actpic_image_for( name + '_actpic', var, attributed_pics, rectify) for name, var in self.actpic_variables.items()] self.evaluator = DatasetEvaluator(self.actpics) self.data_stream = data_stream self.results = None super(ActpicExtension, self).__init__(**kwargs)
import theano import theano.tensor as TT import numpy as np # define tensor variable import time C = TT.matrix("A") # contexts T = TT.matrix("B") # targets batched_tensor_dot = TT.batched_tensordot(C, T, axes=(0,0)) out = theano.function([C,T],batched_tensor_dot) # test values c = np.array([[[1,2,3],[3,4,5]],[[0,1,0],[7,8,6]]]) t = np.array([[1,1,1],[2,2,3]]) start = time.time() print(out(c, t)) #print(time.time() - start) # comparison with numpy print("numpy") for i in range(t.shape[0]): print(np.dot(c[i], t[i]))
def batch_dot(x, y, axes=None): if axes is None: # behaves like tf.batch_matmul as default axes = [(x.ndim-1,), (y.ndim-2,)] return T.batched_tensordot(x, y, axes=axes)
def unfold_filters(channel_weights, kernel_weights): """ Unfolds bc and b01 weights into prober bc01 weights """ return T.batched_tensordot(channel_weights.dimshuffle(0,1,'x'), kernel_weights.dimshuffle(0,'x', 1,2), axes=((2), (1)))
rf_stack_tnsr = tnsr.tensor3('rf_stack_tnsr') ##G x stim_size x stim_size feature_map_tnsr = tnsr.tensor4('feature_map_tnsr') ##T x D x stim_size x stim_size apply_rf_to_feature_maps = function(inputs = [rf_stack_tnsr,feature_map_tnsr], outputs = tnsr.tensordot(rf_stack_tnsr, feature_map_tnsr, axes=[[1,2], [2,3]])) #example python use case #model_space = apply_rf_to_feature_maps(rf_stack, feature_maps) ##-----prediction menu----- (uses batched_tensordot. not sure why this is necessary, but memory error if normal tensordot is used.) model_space_tnsr = tnsr.tensor3('X') ##model-space tensor: G x T x D feature_weight_tnsr = tnsr.tensor3('NU') ##feature weight tensor: G x D x V prediction_menu_tnsr = tnsr.batched_tensordot(model_space_tnsr, feature_weight_tnsr, axes=[[2],[1]]) ##prediction tensor: G x T x V bigmult = function([model_space_tnsr,feature_weight_tnsr], prediction_menu_tnsr) ##example python use case ##prediction_menu = bigmult(model_space,feature_weights) ##G x T x V ###-----error menu----- voxel_data_tnsr = tnsr.matrix('voxel_data_tnsr') ##voxel data tensor: T x V diff = voxel_data_tnsr-prediction_menu_tnsr ##difference tensor: (T x V) - (G x T x V) = (G x T x V) sq_diff = (diff*diff).sum(axis=1) ##sum-sqaured-diffs tensor: G x V sq_diff_func = function(inputs=[voxel_data_tnsr,prediction_menu_tnsr], outputs = sq_diff) ##example python use case
def _match(self, A, B): return T.batched_tensordot(A, B, axes=(2, 2))
def f(i, l1, l2): return T.clip(T.batched_tensordot(l1[i], l2[i], 1), FLOAT_MIN, FLOAT_MAX).astype(FLOATX)
def negFactorization1(self, batchSize, negEmbA, argsEmbB, wC): # first = T.tensordot(relationProbs, self.C, axes=[[1], [2]]) # [l,r] * [k,k,r] = [l, k, k] Afirst = T.batched_tensordot(wC, negEmbA.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l, k, k] * [n, l, k] = [l, k, n] Asecond = T.batched_tensordot(Afirst, argsEmbB, axes=[[1], [1]]) # [l, k, n] * [l, k] = [l, n] return Asecond
def get_output(self, train=False): if self.mode == 'sum' or self.mode == 'ave': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s += self.layers[i].get_output(train) if self.mode == 'ave': s /= len(self.layers) return s elif self.mode == 'index': return self.layers[0].get_output(train)[self.layers[1].get_output(train)[:, 0]] elif self.mode == 'imax' or self.mode == 'iavg' or self.mode == 'imaxavg' or \ self.mode =='imaxminavg': if self.mode == 'imax': fn = lambda start, end, x: T.max(x[start[0]:end[0]], axis=0) elif self.mode == 'iavg': fn = lambda start, end, x: T.mean(x[start[0]:end[0]], axis=0) elif self.mode == 'imaxavg': fn = lambda start, end, x: T.concatenate([T.max(x[start[0]:end[0]], axis=0), T.mean(x[start[0]:end[0]], axis=0)]) else: fn = lambda start, end, x: T.concatenate([T.max(x[start[0]:end[0]], axis=0), T.min(x[start[0]:end[0]], axis=0), T.mean(x[start[0]:end[0]], axis=0)]) data = self.layers[0].get_output(train) starts = self.layers[1].get_output(train) ends = self.layers[2].get_output(train) outputs, _ = theano.scan(fn=fn, outputs_info=None, sequences=[starts, ends], non_sequences=data) return outputs elif self.mode == 'mm': scores = self.layers[0].get_output(train) starts = self.layers[1].get_output(train) ends = self.layers[2].get_output(train) costs = self.layers[3].get_output(train) fn = lambda start, end, scs, csts: \ T.max(csts[start[0]:end[0]] * (3 + scs[start[0]:end[0]] - T.max(scs[start[0]:end[0]][T.eq(csts[start[0]:end[0]], 0).nonzero()[0]]))) outputs, _ = theano.scan(fn=fn, outputs_info=None, sequences=[starts, ends], non_sequences=[scores, costs]) return outputs.reshape((outputs.size, 1)) elif self.mode == 'risk': scores = self.layers[0].get_output(train) starts = self.layers[1].get_output(train) ends = self.layers[2].get_output(train) costs = self.layers[3].get_output(train) fn = lambda start, end, scs, csts: \ T.sum(costs[start[0]:end[0]] * T.nnet.softmax(scs[start[0]:end[0]].T).T) outputs, _ = theano.scan(fn=fn, sequences=[starts, ends], non_sequences=[scores, costs]) return outputs.reshape((outputs.size, 1)) elif self.mode == 'concat': inputs = [self.layers[i].get_output(train) for i in range(len(self.layers))] return T.concatenate(inputs, axis=self.concat_axis) elif self.mode == 'join': inputs = OrderedDict() for i in range(len(self.layers)): X = self.layers[i].get_output(train) if X.name is None: raise ValueError("merge_mode='join' only works with named inputs") else: inputs[X.name] = X return inputs elif self.mode == 'mul': s = self.layers[0].get_output(train) for i in range(1, len(self.layers)): s *= self.layers[i].get_output(train) return s elif self.mode == 'dot': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output = T.batched_tensordot(l1, l2, self.dot_axes) output_shape = list(self.output_shape) output_shape[0] = l1.shape[0] output = output.reshape(tuple(output_shape)) return output elif self.mode == 'cos': l1 = self.layers[0].get_output(train) l2 = self.layers[1].get_output(train) output, _ = theano.scan(lambda v1, v2: T.dot(v1, v2) / T.sqrt(T.dot(v1, v1) * T.dot(v2, v2)), sequences=[l1, l2], outputs_info=None) return output else: raise Exception('Unknown merge mode')
def negRightFactorization(self, batchSize, argsEmbA, negEmbB, wC, wC1, wC2): Afirst = T.batched_tensordot(wC, argsEmbA, axes=[[1], [1]]) # [l, k, k] * [l, k] = [l, k] Asecond = T.batched_tensordot(Afirst, negEmbB.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l, k] * [l, k, n] = [l, n] spFirst = T.batched_dot(wC1, argsEmbA) spAsecond = T.batched_tensordot(wC2, negEmbB.dimshuffle(1, 2, 0), axes=[[1], [1]]) # [l,k] [l,k,n] = [l,n] return Asecond + spAsecond + spFirst.reshape((batchSize, 1))
def call(self, x, mask=None): import theano.tensor as T indices = super(SoftShuffle, self)(x, mask) Y = T.batched_tensordot(indices, x,axes=[(1), (1)]) return Y
def get_output(self, train=False): indices = super(SoftShuffle, self).get_output(train) X = self.get_input(train) Y = T.batched_tensordot(indices, X,axes=[(1), (1)]) return Y