def _embed_word_indices(self, indices, embeddings): """Embed all indexes using the given embeddings. Parameters ---------- indices : ndarray indices of the items to embed using the given embeddings matrix Note: indices are flattened embeddings : ndarray (vocab_size x emb_size) embeddings matrix Returns ------- ndarray (len(indices) x emb_size) embedded indices """ concatenated_input = indices.flatten() # Rami's fix if config.device == 'gpu': embedded = embeddings[concatenated_input] else: embedded = theano.sparse_grad(embeddings[concatenated_input]) return embedded
def link(self, inputs): """ Input should be a matrix with the rows representing examples. We need embeddings for all indices in a matrix, that's why we need to flatten the matrix first, get all embeddings for the indices and than reshape it again. Parameters ---------- inputs 0) indexes of the examples to get the embeddings of Returns ------- [] 0) embeddings of the input """ self.inputs = inputs input = self.inputs[0] concatenated_input = input.flatten() # Rami's fix if config.device == 'gpu': indexed_rows = self.weights[concatenated_input] else: indexed_rows = theano.sparse_grad(self.weights[concatenated_input]) concatenated_rows = indexed_rows.flatten() num_examples = input.shape[0] width = concatenated_rows.size // num_examples self.outputs = [concatenated_rows.reshape((num_examples, width))] return self.outputs
def __init__(self, input, w_values,embedding_dim,mini_batch_size,width, name="EmbeddingLayer", params=None): self.n_in = 1.0 self.w_values=w_values super(EmbeddingLayer, self).__init__(name,params) concatenated_input = input.flatten() if theano.config.device == 'cpu': indexed_rows = theano.sparse_grad(self.weights[concatenated_input]) else: indexed_rows = self.weights[concatenated_input] concatenated_rows = indexed_rows.flatten() #mini_batch_size = input.shape[0] #width = input.shape[1] self.output = concatenated_rows.reshape((mini_batch_size, width*embedding_dim)) self.params = [self.weights]