def build_H_matrix(X, kohonen): """Build the reprentation of the hits matrix from the INPUT space into the OUTPUT space, given the sample matrix X. Required is to have the same sample set as used for training, that is to say the same n_features dimensions. Parameters ---------- X: array-like CSR matrix Sparse representation of the samples in the INPUT space in a CSR matrix of shape = [n_samples, n_features]. Must be the same as that used for fitting the map. Return ------ H_matrix : ndarray Numpy array of shape = [n_nodes] of integers giving for each node the number of best matching documents """ # Initialize the hits matrix as an ndarray of ints debug("Build_H_matrix","Starting the counting of hits...") debug("Build_H_matrix","Using %d documents with %d features" % X.shape) n_nodes = kohonen.shape[0] H_matrix = np.zeros(n_nodes, dtype = np.int) KN = Normalizer().fit_transform(kohonen) # Get the best matching units for all vectors n_samples, n_features = X.shape for i in xrange(n_samples): bmu_idx = c_get_bmu(KN,X.getrow(i))[0] H_matrix[bmu_idx]+=1 print bmu_idx return H_matrix
def build_H_matrix(X, kohonen): """Build the reprentation of the hits matrix from the INPUT space into the OUTPUT space, given the sample matrix X. Required is to have the same sample set as used for training, that is to say the same n_features dimensions. Parameters ---------- X: array-like CSR matrix Sparse representation of the samples in the INPUT space in a CSR matrix of shape = [n_samples, n_features]. Must be the same as that used for fitting the map. Return ------ H_matrix : ndarray Numpy array of shape = [n_nodes] of integers giving for each node the number of best matching documents """ # Initialize the hits matrix as an ndarray of ints debug("Build_H_matrix", "Starting the counting of hits...") debug("Build_H_matrix", "Using %d documents with %d features" % X.shape) n_nodes = kohonen.shape[0] H_matrix = np.zeros(n_nodes, dtype=np.int) KN = Normalizer().fit_transform(kohonen) # Get the best matching units for all vectors n_samples, n_features = X.shape for i in xrange(n_samples): bmu_idx = c_get_bmu(KN, X.getrow(i))[0] H_matrix[bmu_idx] += 1 print bmu_idx return H_matrix
def batch_unit_deltas(Kohonen, kernel, X, normalized_X, kshape, Xshape, topology="rect"): """Loop over the CSR sample number for computing the batch training on a single epoch (training cycle) Return: ------- unit_deltas: csr matrix of shape = [n_nodes, n_features] Gives for each node the weight update with competing samples (batch) quantization_error: float Mean of the quantization error for each item """ # Initialize the dimensions n_samples, n_features = Xshape n_nodes = kshape[0] * kshape[1] # Initilaize the sparse matrices normalized_Kohonen = Normalizer().fit_transform(Kohonen) unit_deltas = sparse.csr_matrix((n_nodes,n_features), dtype=np.double) quantization_error_data = np.empty(n_samples, dtype=np.double) for i in np.arange(n_samples): # Broadcasting the size of the sample over the first axis of K sn = normalized_X.getrow(i) # Get the location of the matching node for that sample assume rect map location, quantization_error_data[i] = c_get_bmu(normalized_Kohonen, sn) bmu = _node_to_coordinate(location, kshape, topology=topology) # Train all units at once by unfolding the kernel infl =_unfold_kernel(bmu, kernel, kshape).flatten() # Update the single interation delta as infl[j]*(X[i]-K[j]) matrix_repr = c_single_unit_deltas(X, Kohonen, i, infl) #matrix_repr = sparse.rand(n_nodes,n_features).tocsr() unit_deltas = unit_deltas + sparse.csr_matrix(matrix_repr, (n_nodes,n_features)) # Calculate the quantization error as from the distance between samples and # their corresponding best matching unit quantization_error = quantization_error_data.mean() return unit_deltas, quantization_error
def predict(self, X): """ Predict the position of the samples in the estimator Kohonen a simple features check is performed. Parameters: ----------- X: matrix CSR matrix of shape = [n_samples, n_features] Return: ------- Y: array Shape = [n_samples] Index of the cluster each sample belongs to. """ if hasattr(self, "K_"): KN = Normalizer().fit_transform(self.K_) return np.array([c_get_bmu(KN, X.getrow(i))[0] for i in xrange(X.shape[0])]) else: debug("predict", "The Kohonen network must be trained before use") return None