def test_transition_bias_unique_speaker(self): """Test when the input cluster_id sequences contain a unique speaker and therefore no speaker changes""" transition_bias, _ = utils.estimate_transition_bias(cluster_ids=[[1] * 100]) self.assertTrue(np.log(transition_bias) != -np.inf) self.assertTrue(np.log(1 - transition_bias) != -np.inf)
def test_transition_bias_always_changing_speaker(self): """Test when in the input cluster_id sequences the speaker always changes""" transition_bias, _ = utils.estimate_transition_bias( cluster_ids=[[1, 2, 1], [2, 1, 2]]) self.assertTrue(np.log(transition_bias) != -np.inf) self.assertTrue(np.log(1 - transition_bias) != -np.inf)
def test_transition_bias_empty_sequences(self): """Test when the input cluster_id sequences are empty""" (transition_bias, denominator) = utils.estimate_transition_bias( cluster_ids=[[], [], []]) self.assertTrue(np.log(transition_bias) != -np.inf) self.assertTrue(np.log(1 - transition_bias) != -np.inf) self.assertTrue(denominator != 0)
def fit(self, train_sequences, train_cluster_ids, args): """Fit UISRNN model. Args: train_sequences: Either a list of training sequences, or a single concatenated training sequence: 1. train_sequences is list, and each element is a 2-dim numpy array of real numbers, of size: `length * D`. The length varies among different sequences, but the D is the same. In speaker diarization, each sequence is the sequence of speaker embeddings of one utterance. 2. train_sequences is a single concatenated sequence, which is a 2-dim numpy array of real numbers. See `fit_concatenated()` for more details. train_cluster_ids: Ground truth labels for train_sequences: 1. if train_sequences is a list, this must also be a list of the same size, each element being a 1-dim list or numpy array of strings. 2. if train_sequences is a single concatenated sequence, this must also be the concatenated 1-dim list or numpy array of strings args: Training configurations. See `arguments.py` for details. Raises: TypeError: If train_sequences or train_cluster_ids is of wrong type. """ if isinstance(train_sequences, np.ndarray): # train_sequences is already the concatenated sequence if self.estimate_transition_bias: # see issue #55: https://github.com/google/uis-rnn/issues/55 self.logger.print( 2, 'Warning: transition_bias cannot be correctly estimated from a ' 'concatenated sequence; train_sequences will be treated as a ' 'single sequence. This can lead to inaccurate estimation of ' 'transition_bias. Please, consider estimating transition_bias ' 'before concatenating the sequences and passing it as argument.' ) train_sequences = [train_sequences] train_cluster_ids = [train_cluster_ids] elif isinstance(train_sequences, list): # train_sequences is a list of un-concatenated sequences # we will concatenate it later, after estimating transition_bias pass else: raise TypeError('train_sequences must be a list or numpy.ndarray') # estimate transition_bias if self.estimate_transition_bias: (transition_bias, transition_bias_denominator ) = utils.estimate_transition_bias(train_cluster_ids) # set or update transition_bias if self.transition_bias is None: self.transition_bias = transition_bias self.transition_bias_denominator = transition_bias_denominator else: self.transition_bias = ( self.transition_bias * self.transition_bias_denominator + transition_bias * transition_bias_denominator) / ( self.transition_bias_denominator + transition_bias_denominator) self.transition_bias_denominator += transition_bias_denominator # concatenate train_sequences (concatenated_train_sequence, concatenated_train_cluster_id) = utils.concatenate_training_data( train_sequences, train_cluster_ids, args.enforce_cluster_id_uniqueness, True) self.fit_concatenated(concatenated_train_sequence, concatenated_train_cluster_id, args)