def test_project_pcs(): x1 = np.random.randn(20000, 2) * np.array([[10., 1.]]) x2 = np.random.randn(20000, 2) * np.array([[1., 10.]]) x = np.dstack((x1, x2)) # Compute the PCs. pcs = compute_pcs(x) # Project the PCs. x_proj = project_pcs(x[0, ...], pcs) assert x_proj.shape == (2, 2)
def save_features(experiment, **prm): """Compute the features from the waveforms and save them in the experiment dataset.""" nwaveforms_max = prm['pca_nwaveforms_max'] npcs = prm['nfeatures_per_channel'] kwik = experiment._files['kwik'] for chgrp in iterkeys(experiment.channel_groups): spikes = experiment.channel_groups[chgrp].spikes # Extract a subset of the saveforms. nspikes = len(spikes) # We convert the extendable features_masks array to a # contiguous array. if prm.get('features_contiguous', True): # Make sure to update the PyTables node after the recreation, # to avoid ClosedNodeError. spikes.features_masks = to_contiguous(spikes.features_masks, nspikes=nspikes) else: warn(("The features array has not been converted to a contiguous " "array.")) # Skip the channel group if there are no spikes. if nspikes == 0: continue nwaveforms = min(nspikes, nwaveforms_max) step = excerpt_step(nspikes, nexcerpts=nwaveforms, excerpt_size=1) waveforms_subset = spikes.waveforms_filtered[::step] # With this option, PCs are directly provided in the PRM file as # a NumPy array if prm.get('canonical_pcs', None) is not None: pcs = prm['canonical_pcs'] assert isinstance(pcs, np.ndarray) else: # We take the masks in order to compute the PCs only on # the unmasked spikes, for each channel. masks = spikes.features_masks[::step, ::npcs, 1] # (nspikes, nchannels) # Compute the PCs. pcs = compute_pcs(waveforms_subset, npcs=npcs, masks=masks) # Add PCs to the KWIK file kwik.createArray(experiment.channel_groups[chgrp]._node, 'pca_waveforms', pcs) # Project the waveforms on the PCs and compute the features. # WARNING: optimization: we could load and project waveforms by chunks. for i, waveform in enumerate(spikes.waveforms_filtered): # Convert waveforms from int16 to float32 with scaling # before computing PCA so as to avoid getting huge numbers. waveform = convert_dtype(waveform, np.float32) features = project_pcs(waveform, pcs) spikes.features_masks[i, :, 0] = features.ravel()
def test_project_pcs(): x1 = np.random.randn(20000, 2) * np.array([[10., 1.]]) x2 = np.random.randn(20000, 2) * np.array([[1., 10.]]) x = np.dstack((x1, x2)) # Compute the PCs. pcs = compute_pcs(x) # Project the PCs. x_proj = project_pcs(x[0,...], pcs) assert x_proj.shape == (2, 2)
def test_compute_pcs_3d(): """Test PCA on a 3D array.""" x1 = np.random.randn(20000, 2) * np.array([[10., 1.]]) x2 = np.random.randn(20000, 2) * np.array([[1., 10.]]) x = np.dstack((x1, x2)) # Compute the PCs. pcs = compute_pcs(x) assert pcs.ndim == 3 assert np.linalg.norm(pcs[0, :, 0] - np.array([-1., 0.])) < 1e-2 assert np.linalg.norm(pcs[1, :, 0] - np.array([0., -1.])) < 1e-2 assert np.linalg.norm(pcs[0, :, 1] - np.array([0, 1.])) < 1e-2 assert np.linalg.norm(pcs[1, :, 1] - np.array([-1., 0.])) < 1e-2
def test_compute_pcs(): """Test PCA on a 2D array.""" # Horizontal ellipsoid. x = np.random.randn(20000, 2) * np.array([[10., 1.]]) # Rotate the points by pi/4. a = 1. / np.sqrt(2.) rot = np.array([[a, -a], [a, a]]) x = np.dot(x, rot) # Compute the PCs. pcs = compute_pcs(x) assert pcs.ndim == 2 assert (np.abs(pcs) - a).max() < 1e-2
def test_compute_pcs_3d(): """Test PCA on a 3D array.""" x1 = np.random.randn(20000, 2) * np.array([[10., 1.]]) x2 = np.random.randn(20000, 2) * np.array([[1., 10.]]) x = np.dstack((x1, x2)) # Compute the PCs. pcs = compute_pcs(x) assert pcs.ndim == 3 assert np.linalg.norm(pcs[0,:,0] - np.array([-1., 0.])) < 1e-2 assert np.linalg.norm(pcs[1,:,0] - np.array([0., -1.])) < 1e-2 assert np.linalg.norm(pcs[0,:,1] - np.array([0, 1.])) < 1e-2 assert np.linalg.norm(pcs[1,:,1] - np.array([-1., 0.])) < 1e-2
def test_compute_pcs(): """Test PCA on a 2D array.""" # Horizontal ellipsoid. x = np.random.randn(20000, 2) * np.array([[10., 1.]]) # Rotate the points by pi/4. a = 1./np.sqrt(2.) rot = np.array([[a, -a], [a, a]]) x = np.dot(x, rot) # Compute the PCs. pcs = compute_pcs(x) assert pcs.ndim == 2 assert (np.abs(pcs) - a).max() < 1e-2
def save_features(experiment, **prm): """Compute the features from the waveforms and save them in the experiment dataset.""" nwaveforms_max = prm['pca_nwaveforms_max'] npcs = prm['nfeatures_per_channel'] kwik = experiment._files['kwik'] for chgrp in iterkeys(experiment.channel_groups): spikes = experiment.channel_groups[chgrp].spikes # Extract a subset of the saveforms. nspikes = len(spikes) # We convert the extendable features_masks array to a # contiguous array. if prm.get('features_contiguous', True): # Make sure to update the PyTables node after the recreation, # to avoid ClosedNodeError. spikes.features_masks = to_contiguous(spikes.features_masks, nspikes=nspikes) else: warn(("The features array has not been converted to a contiguous " "array.")) # Skip the channel group if there are no spikes. if nspikes == 0: continue nwaveforms = min(nspikes, nwaveforms_max) step = excerpt_step(nspikes, nexcerpts=nwaveforms, excerpt_size=1) waveforms_subset = spikes.waveforms_filtered[::step] # With this option, PCs are directly provided in the PRM file as # a NumPy array if prm.get('canonical_pcs', None) is not None: pcs = prm['canonical_pcs'] assert isinstance(pcs, np.ndarray) else: # We take the masks in order to compute the PCs only on # the unmasked spikes, for each channel. masks = spikes.features_masks[::step,::npcs,1] # (nspikes, nchannels) # Compute the PCs. pcs = compute_pcs(waveforms_subset, npcs=npcs, masks=masks) # Add PCs to the KWIK file kwik.createArray(experiment.channel_groups[chgrp]._node, 'pca_waveforms', pcs) # Project the waveforms on the PCs and compute the features. # WARNING: optimization: we could load and project waveforms by chunks. for i, waveform in enumerate(spikes.waveforms_filtered): # Convert waveforms from int16 to float32 with scaling # before computing PCA so as to avoid getting huge numbers. waveform = convert_dtype(waveform, np.float32) features = project_pcs(waveform, pcs) spikes.features_masks[i,:,0] = features.ravel()