def init_seg(cep, show='empty', cluster='init'):
    """
    Return an initial segmentation composed of one segment from the first to the
    last feature in *cep*.

    :param cep: numpy.ndarry containing MFCC
    :param show: the speaker of the cep
    :param cluster: str
    :return: a Diar object
    """
    length = cep.shape[0]
    table_out = Diar()
    table_out.append(show=show, start=0, stop=length, cluster=cluster)
    return table_out
예제 #2
0
    def decode(self, table):
        """
        performs a Viterbi decoding of the segment given in diarization
        :param table: a Diar object
        :return: a Diar object
        """

        # print(self.transition_probabilities)
        # print(self.observation)

        path = numpy.ones((self.nb_features, self.nb_clusters), 'int32') * -1
        path[0, :] = numpy.arange(self.nb_clusters)
        out_diarization = Diar()

        for row in table:
            start = row['start']
            stop = min(row['stop'], self.nb_features - 1)
            logging.debug('perform from %d to %d', start, stop)

            for t in range(start, stop + 1):
                tmp = self.observation[t -
                                       1, :] + self.transition_probabilities
                self.observation[t, :] += numpy.max(tmp, axis=1)
                path[t, :] = numpy.argmax(tmp, axis=1)

            max_pos = numpy.argmax(self.observation[stop, :])
            out_diarization.append(show=self.show,
                                   start=stop - 1,
                                   stop=stop,
                                   cluster=self.cluster_list[max_pos])
            for t in range(stop - 1, start, -1):
                max_pos = path[t, max_pos]
                cluster = self.cluster_list[max_pos]
                if (out_diarization[-1]['start']
                        == t) and (out_diarization[-1]['cluster'] == cluster):
                    out_diarization[-1]['start'] -= 1
                else:
                    out_diarization.append(show=self.show,
                                           start=t - 1,
                                           stop=t,
                                           cluster=cluster)
        out_diarization.sort()
        # self.observation = None
        return out_diarization
def sanity_check(cep, show, cluster='init'):
    """
    Removes equal MFCC of *cep* and return a diarization.

    :param cep: numpy.ndarry containing MFCC
    :param show: speaker of the show
    :return: a dirization object
    """
    table = Diar()

    # 1- diff on cep[i] - cep[i+1]
    # 2- sum of the n MFCC
    # 3- take equal values, give a boolean array
    b = np.sum(np.diff(cep, axis=0), axis=1) == 0
    # make a xor on the boolean array, true index+1 correspond to a boundary
    bits = b[:-1] ^ b[1:]
    # convert true value into a list of feature indexes
    # append 0 at the beginning of the list, append the last index to the list
    idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]]
    # for each pair of indexes (idx[i] and idx[i+1]), create a segment
    for i in range(0, len(idx) - 1, 2):
        table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster)

    return table
def div_gauss(cep, show='empty', win=250, shift=0):
    """
    Segmentation based on gaussian divergence.

    The segmentation detects the instantaneous change points corresponding to
    segment boundaries. The proposed algorithm is based on the detection of
    local maxima. It detects the change points through a gaussian divergence
    (see equation below), computed using Gaussians with diagonal covariance 
    matrices. The left and right gaussians are estimated over a five-second 
    window sliding along the whole signal (2.5 seconds for each gaussian, 
    given *win* =250 features).
    A change point, i.e. a segment boundary, is present in the middle of the
    window when the gaussian divergence score reaches a local maximum.


        :math:`GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l)`

    where :math:`s_l` is the left segment modeled by the mean :math:`\mu_l` and
    the diagonal covariance matrix :math:`\\Sigma_l`, :math:`s_r` is the right
    segment modeled by the mean :math:`\mu_r` and the diagonal covariance
    matrix :math:`\\Sigma_r`.

    :param cep: numpy array of frames
    :param show: speaker of the show
    :param win: windows size in number of frames
    :return: a diarization object (s4d annotation)
    """

    length = cep.shape[0]
    # start and stop of the rolling windows A
    start_a = win - 1  # end of NAN
    stop_a = length - win
    # start and stop of the rolling windows B
    start_b = win + win - 1  # end of nan + delay
    stop_b = length

    # put features in a Pandas DataFrame
    df = pd.DataFrame(cep)
    # compute rolling mean and std in the window of size win, get numpy array
    # mean and std have NAN at the beginning and the end of the output array
    #mean = pd.rolling_mean(df, win).values
    #std = pd.rolling_std(df, win).values
    r = df.rolling(window=win, center=False)
    mean = r.mean().values
    std = r.std().values

    # compute GD scores using 2 windows A and B
    dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / (
        std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1)

    # replace missing value to match cep size
    dist_pad = np.lib.pad(dist, (win - 1, win), 'constant',
                          constant_values=(dist[0], dist[-1]))

    # remove non-speech frame
    # find local maximal at + or - win size
    borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist()
    # append the first and last
    borders = [0] + borders + [length]

    diarization_out = Diar()
    spk = 0
    for i in range(0, len(borders) - 1):
        diarization_out.append(show=show, start=shift+borders[i],
                         stop=shift+borders[i + 1], cluster='S' + str(spk))
        spk += 1
    return diarization_out