def predict(audio, algorithm='SpectralClustering'):
    # Speech Activation Detection

    sad_scores = sad(audio)
    binarize_sad = Binarize(offset=0.52,
                            onset=0.52,
                            log_scale=True,
                            min_duration_off=0.1,
                            min_duration_on=0.1)
    speech = binarize_sad.apply(sad_scores, dimension=1)

    # Speaker Change Detection

    scd_scores = scd(audio)
    peak = Peak(alpha=0.10, min_duration=0.10, log_scale=True)
    partition = peak.apply(scd_scores, dimension=1)

    # Overlapped Speech Detection

    # ovl_scores = ovl(audio)
    # binarize_ovl = Binarize(offset=0.55, onset=0.55, log_scale=True,
    #                         min_duration_off=0.1, min_duration_on=0.1)
    # overlap = binarize_ovl.apply(ovl_scores, dimension=1)

    # Speaker Embedding

    speech_turns = partition.crop(speech)
    embeddings = emb(audio)

    long_turns = Timeline(
        segments=[s for s in speech_turns if s.duration > .5])

    return long_turns, sad_scores, scd_scores, embeddings
예제 #2
0
import torch
import glob, os
import pickle

sad = torch.hub.load('pyannote/pyannote-audio', 'sad_ami')
scd = torch.hub.load('pyannote/pyannote-audio', 'scd_ami')

os.chdir("/root/SpeechLab2020/Debates/demos/")
for file in glob.glob("*.wav"):
    print(file)
    test_file = {'uri': 'filename', 'audio': file }
    sad_scores = sad(test_file)
    from pyannote.audio.utils.signal import Binarize
    binarize = Binarize(offset=0.52, onset=0.52, log_scale=True,
                    scale='relative', min_duration_off=0.1, min_duration_on=0.1)
    speech = binarize.apply(sad_scores, dimension=1)
    f_out_speech = file.split('.')[0] + "_speech.pkl"
    pickle.dump(speech, open(f_out_speech, "wb"))
    scd_scores = scd(test_file)
    from pyannote.audio.utils.signal import Peak
    peak = Peak(alpha=0.10, min_duration=0.10, scale='relative', log_scale=True)
    partition = peak.apply(scd_scores, dimension=1)
    f_out_partition = file.split('.')[0] + "_part.pkl"
    pickle.dump(partition, open(f_out_partition, "wb"), protocol=2)
예제 #3
0
    def initialize(self):
        """Initialize pipeline with current set of parameters"""

        self._peak = Peak(alpha=self.alpha,
                          min_duration=self.min_duration)
            syl_counts_prev = pd.concat([syl_counts_prev, rates.syl_counts_prev])
            syl_counts_next = pd.concat([syl_counts_next, rates.syl_counts_next])
            prev_stretch_duration = pd.concat([prev_stretch_duration, rates.prev_stretch_duration])
            next_stretch_duration = pd.concat([next_stretch_duration, rates.next_stretch_duration])

        end_time = timeit.default_timer()
        print("Finished calculating speaking rate for File %d/%d" % ((i+1), n_unique_files))
        print("Time: ", end_time - start_time)
        speaking_rates_prev.to_csv(checkpoint_speaking_rate_prev)
        speaking_rates_next.to_csv(checkpoint_speaking_rate_next)
        syl_counts_prev.to_csv(checkpoint_syl_counts_prev)
        syl_counts_next.to_csv(checkpoint_syl_counts_next)
        prev_stretch_duration.to_csv(checkpoint_prev_stretch_duration)
        next_stretch_duration.to_csv(checkpoint_next_stretch_duration)

        with open(checkpoint_file, 'w') as file:
            writer = csv.writer(file)
            writer.writerow([i, source_file])
        file.close() 
        
        
if __name__ == '__main__':
    #pip install git+https://github.com/pyannote/pyannote-audio.git@develop
    #pip install pyAudioAnalysis
    
    df_source = pd.read_pickle(DF_SOURCE_PATH)
    df_hom = pd.read_csv(DF_HOMOGRAPHS_PATH, index_col = "Unnamed: 0")
    celex_dict = read_celex_file()
    scd = torch.hub.load('pyannote/pyannote-audio', 'scd_ami')
    peak = Peak(alpha=0.2, min_duration=0.20, log_scale=True)
     
예제 #5
0
class SpeakerChangeDetection(Pipeline):
    """Speaker change detection pipeline

    Parameters
    ----------
    scores : Wrappable, optional
        Describes how raw speaker change detection scores should be obtained.
        See pyannote.audio.features.wrapper.Wrapper documentation for details.
        Defaults to "@scd_scores" that indicates that protocol files provide
        the scores in the "scd_scores" key.
    purity : `float`, optional
        Target segments purity. Defaults to 0.95.
    fscore : bool, optional
        Optimize (precision/recall) fscore. Defaults to optimizing coverage at
        given target `purity`.
    diarization : bool, optional
        Use diarization purity and coverage. Defaults to segmentation purity
        and coverage.

    Hyper-parameters
    ----------------
    alpha : `float`
        Peak detection threshold.
    min_duration : `float`
        Segment minimum duration.
    """

    def __init__(self, scores: Wrappable = None,
                       purity: Optional[float] = 0.95,
                       fscore: bool = False,
                       diarization: bool = False):
        super().__init__()

        if scores is None:
            scores = "@scd_scores"
        self.scores = scores
        self._scores = Wrapper(self.scores)

        self.purity = purity
        self.fscore = fscore
        self.diarization = diarization

        # hyper-parameters
        self.alpha = Uniform(0., 1.)
        self.min_duration = Uniform(0., 10.)

    def initialize(self):
        """Initialize pipeline with current set of parameters"""

        self._peak = Peak(alpha=self.alpha,
                          min_duration=self.min_duration)

    def __call__(self, current_file: dict) -> Annotation:
        """Apply change detection

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol.  May contain a
            'scd_scores' key providing precomputed scores.

        Returns
        -------
        speech : `pyannote.core.Annotation`
            Speech regions.
        """

        scd_scores = self._scores(current_file)

        # if this check has not been done yet, do it once and for all
        if not hasattr(self, "log_scale_"):
            # heuristic to determine whether scores are log-scaled
            if np.nanmean(scd_scores.data) < 0:
                self.log_scale_ = True
            else:
                self.log_scale_ = False

        data = np.exp(scd_scores.data) if self.log_scale_ \
               else scd_scores.data

        # take the final dimension
        # (in order to support both classification, multi-class classification,
        # and regression scores)
        change_prob = SlidingWindowFeature(
            data[:, -1],
            scd_scores.sliding_window)

        # peak detection
        change = self._peak.apply(change_prob)
        change.uri = current_file.get('uri', None)

        return change.to_annotation(generator='string', modality='audio')

    def get_metric(self, parallel=False) -> Union[DiarizationPurityCoverageFMeasure,
                                                  SegmentationPurityCoverageFMeasure]:
        """Return new instance of f-score metric"""

        if not self.fscore:
            raise NotImplementedError()

        if self.diarization:
            return DiarizationPurityCoverageFMeasure(parallel=parallel)

        return SegmentationPurityCoverageFMeasure(tolerance=0.5,
                                                  parallel=parallel)

    def loss(self, current_file: dict, hypothesis: Annotation) -> float:
        """Compute (1 - coverage) at target purity

        If purity < target, return 1 + (1 - purity)

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol.
        hypothesis : `pyannote.core.Annotation`
            Speech regions.

        Returns
        -------
        error : `float`
            1. - segment coverage.
        """

        metric = SegmentationPurityCoverageFMeasure(tolerance=0.500, beta=1)
        reference  = current_file['annotation']
        uem = get_annotated(current_file)
        f_measure = metric(reference, hypothesis, uem=uem)
        purity, coverage, _ = metric.compute_metrics()
        if purity > self.purity:
            return 1. - coverage
        else:
            return 1. + (1. - purity)