def predict(self, y, return_quadratic_form=False): assert np.iscomplexobj(y), y.dtype y = normalize_observation(y) # swap D and N dim affiliation, quadratic_form, _ = self._predict(y) if return_quadratic_form: return affiliation, quadratic_form else: return affiliation
def update_qD(self, logspec0, spatial_feats): '''Updates q(D) approximate posterior. Follows Eq. (19) from [1]. Additionally permutes the spatial components to best fit the spectral components at each frequency. [1] Integration of variational autoencoder and spatial clustering for adaptive multi-channel neural speech separation; K. Zmolikova, M. Delcroix, L. Burget, T. Nakatani, J. Cernocky Args: logspec0 (torch.Tensor): Spectral features of shape (T,F) spatial_feats (np.array): Spatial features of shape (C,T,F) ''' _, n_t, n_f = spatial_feats.shape spatial_norm = normalize_observation(spatial_feats.transpose(2, 1, 0)) spat_log_p, _ = self.spatial.cacg._log_pdf(spatial_norm[:, None, :, :]) spat_log_p = spat_log_p.transpose(1, 2, 0) spectral_log_p = self._spectral_log_p(logspec0) perm = self._find_best_permutation(spectral_log_p.detach().numpy(), spat_log_p, idx_constant=1) for f in range(n_f): spat_log_p[..., f] = spat_log_p[perm[0, f], :, f] ln_qds_unnorm = [] for i in range(self.n_classes): qd1_unnorm = torch.tensor(spat_log_p[i]).to(self.device).float() qd1_unnorm += spectral_log_p[i] ln_qds_unnorm.append(qd1_unnorm) ln_qds_unnorm = torch.stack(ln_qds_unnorm) # subtract max for stability of exp (the constant does not matter) ln_qds_unnorm = (ln_qds_unnorm - ln_qds_unnorm.max(dim=0, keepdim=True)[0]) qds_unnorm = ln_qds_unnorm.exp() qd = qds_unnorm / (qds_unnorm.sum(axis=0) + 1e-6) qd = qd.clamp(1e-6, 1 - 1e-6) self.qd = qd.detach()
def log_likelihood(self, y): """ >>> import paderbox as pb >>> F, T, D, K = 513, 400, 6, 3 >>> y = pb.utils.random_utils.normal([F, T, D], dtype=np.complex128) >>> mm = CACGMMTrainer().fit(y, num_classes=K, iterations=2) >>> log_likelihood1 = mm.log_likelihood(y) >>> mm = CACGMMTrainer().fit(y, initialization=mm, iterations=1) >>> log_likelihood2 = mm.log_likelihood(y) >>> assert log_likelihood2 > log_likelihood1, (log_likelihood1, log_likelihood2) >>> np.isscalar(log_likelihood1), log_likelihood1.dtype (True, dtype('float64')) """ assert np.iscomplexobj(y), y.dtype y = normalize_observation(y) # swap D and N dim affiliation, quadratic_form, log_pdf = self._predict(y) return self._log_likelihood(y, log_pdf)
def update_spatial(self, spatial_feats): '''Updates parameters of spatial model. Args: spatial_feats (np.array): Spatial features of shape (C,T,F) ''' spatial_norm = normalize_observation(spatial_feats.transpose(2, 1, 0)) _, quadratic_form = self.spatial.predict(spatial_feats.transpose( 2, 1, 0), return_quadratic_form=True) spec_norm = spatial_norm spatial_model = self.cacGMM._m_step( spec_norm, quadratic_form, self.qd.permute(2, 0, 1).detach().cpu().numpy(), saliency=None, hermitize=True, covariance_norm='eigenvalue', eigenvalue_floor=1e-10, weight_constant_axis=self.wca) self.spatial = spatial_model
def permute_global(self, logspec0, spatial_feats): '''Flips the order of classes in q(D) according to spectral and spatial likelihoods. The goal is to align components (coresponding to speakers and noise) between spectral and spatial model. We choose to keep the order in spectral model and change the order in spatial model. Due to the way this function is called (after initializing spatial models and before update of q(Z) and q(D)), this is esentially the same as changing the order in q(D). Args: logspec0 (torch.Tensor): Spectral features of shape (T,F) spatial_feats (np.array): Spatial features of shape (C,T,F) ''' spatial_norm = normalize_observation(spatial_feats.transpose(2, 1, 0)) spat_log_p, _ = self.spatial.cacg._log_pdf(spatial_norm[:, None, :, :]) spat_log_p = spat_log_p.transpose(1, 2, 0) spectral_log_p = self._spectral_log_p(logspec0) perm = self._find_best_permutation(spectral_log_p.detach().numpy(), spat_log_p, idx_constant=(1, 2)) self.qd = self.qd[perm[0, 0]]
def fit( self, y, initialization=None, num_classes=None, iterations=100, *, saliency=None, source_activity_mask=None, weight_constant_axis=(-1, ), dirichlet_prior_concentration=1, hermitize=True, covariance_norm='eigenvalue', affiliation_eps=1e-10, eigenvalue_floor=1e-10, return_affiliation=False, ): """ Args: y: Shape (..., N, D) initialization: Affiliations between 0 and 1. Shape (..., K, N) or CACGMM instance num_classes: Scalar >0 iterations: Scalar >0 saliency: Importance weighting for each observation, shape (..., N) ToDo: Discuss: allow str e.g. 'norm' as `saliency = np.linalg.norm(y)` source_activity_mask: Boolean mask that says for each time point for each source if it is active or not. Shape (..., K, N) weight_constant_axis: The axis that us used to calculate the mean over the affiliations. The affiliations have the shape (..., K, N), so the default value means averaging over the sample dimension. Note an averaging over independent axis is supported. Averaging over -2 is identical to dirichlet_prior_concentration == np.inf. dirichlet_prior_concentration: Prior for the mixture weight hermitize: covariance_norm: 'eigenvalue', 'trace' or False eigenvalue_floor: Relative flooring of the covariance eigenvalues return_affiliation: Returns: """ assert xor(initialization is None, num_classes is None), ( "Incompatible input combination. " "Exactly one of the two inputs has to be None: " f"{initialization is None} xor {num_classes is None}") assert np.iscomplexobj(y), y.dtype assert y.shape[-1] > 1, y.shape y = normalize_observation(y) # swap D and N dim assert iterations > 0, iterations model = None *independent, D, num_observations = y.shape if initialization is None: assert num_classes is not None, num_classes affiliation_shape = (*independent, num_classes, num_observations) affiliation = np.random.uniform(size=affiliation_shape) affiliation /= np.einsum("...kn->...n", affiliation)[..., None, :] quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, np.ndarray): num_classes = initialization.shape[-2] assert num_classes > 1, num_classes affiliation_shape = (*independent, num_classes, num_observations) # Force same number of dims (Prevent wrong input) assert initialization.ndim == len(affiliation_shape), ( initialization.shape, affiliation_shape) # Allow singleton dimensions to be broadcasted assert initialization.shape[-2:] == affiliation_shape[-2:], ( initialization.shape, affiliation_shape) affiliation = np.broadcast_to(initialization, affiliation_shape) quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, CACGMM): num_classes = initialization.weight.shape[-2] model = initialization else: raise TypeError('No sufficient initialization.') if isinstance(weight_constant_axis, list): # List does not work in numpy 1.16.0 as axis weight_constant_axis = tuple(weight_constant_axis) if source_activity_mask is not None: assert source_activity_mask.dtype == np.bool, source_activity_mask.dtype assert source_activity_mask.shape[-2:] == ( num_classes, num_observations), (source_activity_mask.shape, independent, num_classes, num_observations) if isinstance(initialization, np.ndarray): assert source_activity_mask.shape == initialization.shape, ( source_activity_mask.shape, initialization.shape) assert num_classes < 20, f'num_classes: {num_classes}, sure?' assert D < 35, f'Channels: {D}, sure?' for iteration in range(iterations): if model is not None: affiliation, quadratic_form, _ = model._predict( y, source_activity_mask=source_activity_mask, affiliation_eps=affiliation_eps, ) model = self._m_step( y, quadratic_form, affiliation=affiliation, saliency=saliency, dirichlet_prior_concentration=dirichlet_prior_concentration, hermitize=hermitize, covariance_norm=covariance_norm, eigenvalue_floor=eigenvalue_floor, weight_constant_axis=weight_constant_axis, ) if return_affiliation is True: return model, affiliation elif return_affiliation is False: return model else: raise ValueError(return_affiliation)
def fit( self, y, initialization=None, num_classes=None, iterations=100, *, saliency=None, source_activity_mask=None, weight_constant_axis=(-1, ), hermitize=True, covariance_norm='eigenvalue', affiliation_eps=1e-10, eigenvalue_floor=1e-10, inline_permutation_aligner: _PermutationAlignment = None, ): """ Args: y: Shape (..., N, D) initialization: Affiliations between 0 and 1. Shape (..., K, N) or CACGMM instance num_classes: Scalar >0 iterations: Scalar >0 saliency: Importance weighting for each observation, shape (..., N) Should be pre-calculated externally, not just a string. source_activity_mask: Boolean mask that says for each time point for each source if it is active or not. Shape (..., K, N) weight_constant_axis: The axis that is used to calculate the mean over the affiliations. The affiliations have the shape (..., K, N), so the default value means averaging over the sample dimension. Note that averaging over an independent axis is supported. hermitize: covariance_norm: 'eigenvalue', 'trace' or False affiliation_eps: eigenvalue_floor: Relative flooring of the covariance eigenvalues inline_permutation_aligner: In rare cases you may want to run a permutation alignment solver after each E-step. You can instantiate a permutation alignment solver outside of the fit function and pass it to this function. Returns: """ assert xor(initialization is None, num_classes is None), ( "Incompatible input combination. " "Exactly one of the two inputs has to be None: " f"{initialization is None} xor {num_classes is None}") assert np.iscomplexobj(y), y.dtype assert y.shape[-1] > 1, y.shape y = normalize_observation(y) # swap D and N dim assert iterations > 0, iterations model = None *independent, D, num_observations = y.shape if initialization is None: assert num_classes is not None, num_classes affiliation_shape = (*independent, num_classes, num_observations) affiliation = np.random.uniform(size=affiliation_shape) affiliation /= np.einsum("...kn->...n", affiliation)[..., None, :] quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, np.ndarray): num_classes = initialization.shape[-2] assert num_classes > 1, num_classes affiliation_shape = (*independent, num_classes, num_observations) # Force same number of dims (Prevent wrong input) assert initialization.ndim == len(affiliation_shape), ( initialization.shape, affiliation_shape) # Allow singleton dimensions to be broadcasted assert initialization.shape[-2:] == affiliation_shape[-2:], ( initialization.shape, affiliation_shape) affiliation = np.broadcast_to( initialization.astype(dtype=y.real.dtype), affiliation_shape) quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, CACGMM): # weight[-2] may be 1, when weight is fixed to 1/K # num_classes = initialization.weight.shape[-2] num_classes = initialization.cacg.covariance_eigenvectors.shape[-3] model = initialization else: raise TypeError('No sufficient initialization.') if isinstance(weight_constant_axis, list): # List does not work in numpy 1.16.0 as axis weight_constant_axis = tuple(weight_constant_axis) if source_activity_mask is not None: assert source_activity_mask.dtype == np.bool, source_activity_mask.dtype # noqa assert source_activity_mask.shape[-2:] == ( num_classes, num_observations), (source_activity_mask.shape, independent, num_classes, num_observations) # noqa if isinstance(initialization, np.ndarray): assert source_activity_mask.shape == initialization.shape, ( source_activity_mask.shape, initialization.shape) # noqa assert num_classes < 20, f'num_classes: {num_classes}, sure?' assert D < 35, f'Channels: {D}, sure?' for iteration in range(iterations): if model is not None: affiliation, quadratic_form, _ = model._predict( y, source_activity_mask=source_activity_mask, affiliation_eps=affiliation_eps, ) if inline_permutation_aligner is not None: affiliation, quadratic_form \ = apply_inline_permutation_alignment( affiliation=affiliation, quadratic_form=quadratic_form, weight_constant_axis=weight_constant_axis, aligner=inline_permutation_aligner, ) model = self._m_step( y, quadratic_form, affiliation=affiliation, saliency=saliency, hermitize=hermitize, covariance_norm=covariance_norm, eigenvalue_floor=eigenvalue_floor, weight_constant_axis=weight_constant_axis, ) return model
def fit(self, y, initialization=None, num_classes=None, iterations=100, *, saliency=None, source_activity_mask=None, dirichlet_prior_concentration=1, hermitize=True, covariance_norm='eigenvalue', affiliation_eps=1e-10, eigenvalue_floor=1e-10, return_affiliation=False): """ Args: y: Shape (..., N, D) initialization: Affiliations between 0 and 1. Shape (..., K, N) or CACGMM instance num_classes: Scalar >0 iterations: Scalar >0 saliency: Importance weighting for each observation, shape (..., N) ToDo: Discuss: allow str e.g. 'norm' as `saliency = np.linalg.norm(y)` source_activity_mask: Boolean mask that says for each time point for each source if it is active or not. Shape (..., K, N) dirichlet_prior_concentration: Prior for the mixture weight hermitize: covariance_norm: 'eigenvalue', 'trace' or False eigenvalue_floor: Relative flooring of the covariance eigenvalues return_affiliation: Returns: """ assert xor(initialization is None, num_classes is None), ( "Incompatible input combination. " "Exactly one of the two inputs has to be None: " f"{initialization is None} xor {num_classes is None}") assert np.iscomplexobj(y), y.dtype assert y.shape[-1] > 1 y = normalize_observation(y) # swap D and N dim assert iterations > 0, iterations model = None *independent, D, num_observations = y.shape if initialization is None: assert num_classes is not None, num_classes affiliation_shape = (*independent, num_classes, num_observations) affiliation = np.random.uniform(size=affiliation_shape) affiliation /= np.einsum("...kn->...n", affiliation)[..., None, :] quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, np.ndarray): num_classes = initialization.shape[-2] affiliation_shape = (*independent, num_classes, num_observations) assert initialization.shape == affiliation_shape, ( initialization.shape, affiliation_shape) affiliation = initialization quadratic_form = np.ones(affiliation_shape, dtype=y.real.dtype) elif isinstance(initialization, CACGMM): num_classes = initialization.weight.shape[-1] model = initialization else: raise TypeError('No sufficient initialization.') if source_activity_mask is not None: assert source_activity_mask.dtype == np.bool, source_activity_mask.dtype assert source_activity_mask.shape[-2:] == ( num_classes, num_observations), (source_activity_mask.shape, independent, num_classes, num_observations) if isinstance(initialization, np.ndarray): assert source_activity_mask.shape == initialization.shape, ( source_activity_mask.shape, initialization.shape) assert num_classes < 20, f'num_classes: {num_classes}, sure?' assert D < 30, f'Channels: {D}, sure?' for iteration in range(iterations): if model is not None: affiliation, quadratic_form = model._predict( y, source_activity_mask=source_activity_mask, affiliation_eps=affiliation_eps, ) model = self._m_step( y, quadratic_form, affiliation=affiliation, saliency=saliency, dirichlet_prior_concentration=dirichlet_prior_concentration, hermitize=hermitize, covariance_norm=covariance_norm, eigenvalue_floor=eigenvalue_floor, ) if return_affiliation is True: return model, affiliation elif return_affiliation is False: return model else: raise ValueError(return_affiliation)
def predict(self, y): assert np.iscomplexobj(y), y.dtype y = normalize_observation(y) # swap D and N dim affiliation, quadratic_form = self._predict(y) return affiliation