def init_seg(cep, show='empty', cluster='init'): """ Return an initial segmentation composed of one segment from the first to the last feature in *cep*. :param cep: numpy.ndarry containing MFCC :param show: the speaker of the cep :param cluster: str :return: a Diar object """ length = cep.shape[0] table_out = Diar() table_out.append(show=show, start=0, stop=length, cluster=cluster) return table_out
def decode(self, table): """ performs a Viterbi decoding of the segment given in diarization :param table: a Diar object :return: a Diar object """ # print(self.transition_probabilities) # print(self.observation) path = numpy.ones((self.nb_features, self.nb_clusters), 'int32') * -1 path[0, :] = numpy.arange(self.nb_clusters) out_diarization = Diar() for row in table: start = row['start'] stop = min(row['stop'], self.nb_features - 1) logging.debug('perform from %d to %d', start, stop) for t in range(start, stop + 1): tmp = self.observation[t - 1, :] + self.transition_probabilities self.observation[t, :] += numpy.max(tmp, axis=1) path[t, :] = numpy.argmax(tmp, axis=1) max_pos = numpy.argmax(self.observation[stop, :]) out_diarization.append(show=self.show, start=stop - 1, stop=stop, cluster=self.cluster_list[max_pos]) for t in range(stop - 1, start, -1): max_pos = path[t, max_pos] cluster = self.cluster_list[max_pos] if (out_diarization[-1]['start'] == t) and (out_diarization[-1]['cluster'] == cluster): out_diarization[-1]['start'] -= 1 else: out_diarization.append(show=self.show, start=t - 1, stop=t, cluster=cluster) out_diarization.sort() # self.observation = None return out_diarization
def sanity_check(cep, show, cluster='init'): """ Removes equal MFCC of *cep* and return a diarization. :param cep: numpy.ndarry containing MFCC :param show: speaker of the show :return: a dirization object """ table = Diar() # 1- diff on cep[i] - cep[i+1] # 2- sum of the n MFCC # 3- take equal values, give a boolean array b = np.sum(np.diff(cep, axis=0), axis=1) == 0 # make a xor on the boolean array, true index+1 correspond to a boundary bits = b[:-1] ^ b[1:] # convert true value into a list of feature indexes # append 0 at the beginning of the list, append the last index to the list idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]] # for each pair of indexes (idx[i] and idx[i+1]), create a segment for i in range(0, len(idx) - 1, 2): table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster) return table
def div_gauss(cep, show='empty', win=250, shift=0): """ Segmentation based on gaussian divergence. The segmentation detects the instantaneous change points corresponding to segment boundaries. The proposed algorithm is based on the detection of local maxima. It detects the change points through a gaussian divergence (see equation below), computed using Gaussians with diagonal covariance matrices. The left and right gaussians are estimated over a five-second window sliding along the whole signal (2.5 seconds for each gaussian, given *win* =250 features). A change point, i.e. a segment boundary, is present in the middle of the window when the gaussian divergence score reaches a local maximum. :math:`GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l)` where :math:`s_l` is the left segment modeled by the mean :math:`\mu_l` and the diagonal covariance matrix :math:`\\Sigma_l`, :math:`s_r` is the right segment modeled by the mean :math:`\mu_r` and the diagonal covariance matrix :math:`\\Sigma_r`. :param cep: numpy array of frames :param show: speaker of the show :param win: windows size in number of frames :return: a diarization object (s4d annotation) """ length = cep.shape[0] # start and stop of the rolling windows A start_a = win - 1 # end of NAN stop_a = length - win # start and stop of the rolling windows B start_b = win + win - 1 # end of nan + delay stop_b = length # put features in a Pandas DataFrame df = pd.DataFrame(cep) # compute rolling mean and std in the window of size win, get numpy array # mean and std have NAN at the beginning and the end of the output array #mean = pd.rolling_mean(df, win).values #std = pd.rolling_std(df, win).values r = df.rolling(window=win, center=False) mean = r.mean().values std = r.std().values # compute GD scores using 2 windows A and B dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / ( std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1) # replace missing value to match cep size dist_pad = np.lib.pad(dist, (win - 1, win), 'constant', constant_values=(dist[0], dist[-1])) # remove non-speech frame # find local maximal at + or - win size borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist() # append the first and last borders = [0] + borders + [length] diarization_out = Diar() spk = 0 for i in range(0, len(borders) - 1): diarization_out.append(show=show, start=shift+borders[i], stop=shift+borders[i + 1], cluster='S' + str(spk)) spk += 1 return diarization_out