def context_fn(ind_sent_list): index = ind_sent_list[0] sent_list = ind_sent_list[1] env_matrix = context_fn.env_matrix temp_dir = context_fn.temp_dir # Very slow in SciPy v. 0.7.2. Same for dok_matrix. # mem_matrix = lil_matrix(env_matrix.shape) # Occupies too much memory # mem_matrix = np.zeros(env_matrix.shape, dtype=np.float32) # So, using dictionary as temporary sparse matrix mem_matrix = dict() print 'Training on chunk of sentences', index for sent in sent_list: for i,word in enumerate(sent): if word not in mem_matrix: mem_matrix[word] = np.zeros(env_matrix.shape[1], dtype=np.float32) # Left context for ctxword in sent[:i]: mem_matrix[word] += env_matrix[ctxword,:] # mem_matrix[word,:] += env_matrix[ctxword,:] # Right context for ctxword in sent[i+1:]: mem_matrix[word] += env_matrix[ctxword,:] # mem_matrix[word,:] += env_matrix[ctxword,:] print 'Chunk of sentences', index # print mem_matrix tmp_file =\ os.path.join(temp_dir, 'context-' + str(index) + '.tmp.npy') print 'Dumping to temp file\n'\ ' ', tmp_file dump_matrix(mem_matrix, tmp_file) return tmp_file
def order_fn(ind_sent_list): index = ind_sent_list[0] sent_list = ind_sent_list[1] env_matrix = order_fn.env_matrix lmbda = order_fn.lmbda temp_dir = order_fn.temp_dir left_permutation = order_fn.left_permutation right_permutation = order_fn.right_permutation placeholder = order_fn.placeholder # Very slow in SciPy v. 0.7.2. Same for dok_matrix. # mem_matrix = lil_matrix(env_matrix.shape) # Occupies too much memory # mem_matrix = np.zeros(env_matrix.shape, dtype=np.float32) # So, using dictionary as temporary sparse matrix mem_matrix = dict() print 'Training on chunk of sentences', index for sent in sent_list: for k in xrange(1, lmbda): for i,word in enumerate(sent): a = i - k left = 0 if a < 0 else a b = i + k right = len(sent) if b > len(sent) else b vector_list = ([env_matrix[w] for w in sent[left:i]] + [placeholder] + [env_matrix[w] for w in sent[i+1:right]]) def f(vector_list): if len(vector_list) == 0: return np.zeros(self.dimension) elif len(vector_list) == 1: return vector_list[0] else: v1 = dual.fft(left_permutation(f(vector_list[:-1]))) v2 = dual.fft(right_permutation(vector_list[len(vector_list)-1])) return dual.ifft(v1 * v2) order_vector = f(vector_list) if word not in mem_matrix: mem_matrix[word] = np.zeros(env_matrix.shape[1], dtype=np.float32) mem_matrix[word] += order_vector print 'Chunk of sentences', index, '\n', mem_matrix tmp_file =\ os.path.join(temp_dir, 'order-' + str(index) + '.tmp.npy') print 'Dumping to temp file\n'\ ' ', tmp_file dump_matrix(mem_matrix, tmp_file) return tmp_file
def dump_matrix(self, filename): dump_matrix(self.matrix, filename)