Exemple #1
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # no input ==> no output
        if sequence is Stream.NoNewData:
            return Stream.NoNewData

        if sequence is Stream.EndOfStream:
            if not self.initialized_:
                return Stream.EndOfStream

            self.initialized_ = False
            data = self.agg_func(self.buffer_, axis=0)
            return SlidingWindowFeature(data, self.frames_)

        if not self.initialized_:
            return self.initialize(sequence)

        # check that feature sequence uses the common time base
        sw = sequence.sliding_window
        assert sw.duration == self.frames_.duration
        assert sw.step == self.frames_.step
        assert sw.start > self.frames_.start

        delta_start = sw.start - self.frames_.start
        ready = self.frames_.samples(delta_start, mode='center')
        data = self.agg_func(self.buffer_[:, :ready], axis=0)
        output = SlidingWindowFeature(data, self.frames_)

        self.buffer_ = self.buffer_[:, ready:]
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        # remove empty (all NaN) buffers
        n_buffers = self.buffer_.shape[0]
        for i in range(n_buffers):
            if np.any(~np.isnan(self.buffer_[i])):
                break
        self.buffer_ = self.buffer_[i:]

        n_samples = self.buffer_.shape[1]
        n_new_samples = sequence.data.shape[0]
        pad_width = ((0, 1), (0, max(0, n_new_samples - n_samples)))
        for _ in sequence.data.shape[1:]:
            pad_width += ((0, 0), )
        self.buffer_ = np.pad(self.buffer_,
                              pad_width,
                              'constant',
                              constant_values=np.NAN)
        self.buffer_[-1] = sequence.data

        return output
Exemple #2
0
    def __init__(self, sample_rate=None, mono=True, augmentation=None):

        super().__init__()
        self.sample_rate = sample_rate
        self.mono = mono

        self.augmentation = augmentation

        if sample_rate is not None:
            self.sliding_window_ = SlidingWindow(start=-.5 / sample_rate,
                                                 duration=1. / sample_rate,
                                                 step=1. / sample_rate)
Exemple #3
0
    def crop(self, current_file, segment, mode='center', fixed=None) -> np.ndarray:
        """Fast version of self(current_file).crop(segment, mode='center',
+                                                  fixed=segment.duration)

        Parameters
        ----------
        current_file : dict
            `pyannote.database` file. Must contain a 'duration' key that
            provides the duration (in seconds) of the audio file.
        segment : `pyannote.core.Segment`
            Segment from which to extract features.

        Returns
        -------
        features : (n_frames, dimension) numpy array
            Extracted features

        See also
        --------
        `pyannote.core.SlidingWindowFeature.crop`
        """

        context = self.get_context_duration()

        # extend segment on both sides with requested context
        xsegment = Segment(
            max(0, segment.start - context),
            min(current_file['duration'], segment.end + context))

        # obtain (augmented) waveform on this extended segment
        y = self.raw_audio_.crop(current_file, xsegment, mode='center',
                                 fixed=xsegment.duration)

        features = self.get_features(y, self.sample_rate)

        # get rid of additional context before returning
        frames = self.sliding_window
        shifted_frames = SlidingWindow(start=xsegment.start - frames.step,
                                       step=frames.step,
                                       duration=frames.duration)
        (start, end), = shifted_frames.crop(segment, mode=mode, fixed=fixed,
                                            return_ranges=True)

        # HACK for when start (returned by shifted_frames.crop) is negative
        # due to floating point precision.
        if start < 0:
            if fixed is not None:
                end -= start
            start = 0

        return features[start:end]
Exemple #4
0
    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        data = sequence.data
        shape = (1, ) + data.shape
        self.buffer_ = np.ones(shape, dtype=data.dtype)
        self.buffer_[0, :] = data

        self.initialized_ = True

        return Stream.NoNewData
    def apply(self, wav):
        """Computes distance between sliding windows embeddings

        Parameter
        ---------
        wav : str
            Path to wav audio file

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        from pyannote.algorithms.stats.gaussian import Gaussian

        current_file = {'uri': wav, 'medium': {'wav': wav}}
        t, left, right = next(self.from_file(current_file))

        y = []
        for xL, xR in zip(left, right):
            gL = Gaussian(covariance_type='diag').fit(xL)
            gR = Gaussian(covariance_type='diag').fit(xR)
            y.append(gL.divergence(gR))

        y = np.array(y)

        window = SlidingWindow(duration=2 * self.duration,
                               step=self.step,
                               start=0.)
        return SlidingWindowFeature(y, window)
Exemple #6
0
    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        self.buffer_ = np.array(sequence.data)

        self.window_ = SlidingWindow(start=sw.start,
                                     duration=self.duration,
                                     step=self.step)
        self.current_window_ = next(self.window_)
        self.n_samples_ = self.frames_.samples(self.duration, mode='center')
        self.initialized_ = True
Exemple #7
0
def speaker_spotting_try(current_trial):

    # target model
    model = models[current_trial['model_id']]
    # where to look for this target
    try_with = current_trial['try_with']

    # precomputed embedding
    embeddings = precomputed(current_trial)

    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    first, last = indices[0], indices[-1]

    speech_timeline = SAD[current_trial['uri']]
    indices_speech = embeddings.sliding_window.crop(speech_timeline,
                                                    mode='center')

    # compare all embeddings to target model
    data = 2. - np.mean(
        cdist(embeddings.data, model, metric='cosine'), axis=1, keepdims=True)
    score = np.zeros((len(embeddings.data) + 2, 1))
    indices_speech = [
        indice for indice in indices_speech if indice < len(data)
    ]
    score[indices_speech] = data[indices_speech]
    score = score[first:last + 1]
    sliding_window = SlidingWindow(
        start=embeddings.sliding_window[first].start,
        duration=embeddings.sliding_window.duration,
        step=embeddings.sliding_window.step)

    return SlidingWindowFeature(score, sliding_window)
Exemple #8
0
def speaker_spotting_try(current_trial):

    # target model
    model = models[current_trial['model_id']]
    # where to look for this target
    try_with = current_trial['try_with']
    
    # precomputed embedding
    embeddings = precomputed(current_trial)
    
    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    first, last = indices[0], indices[-1]
    
    speech_timeline = REFERENCE[current_trial['uri']].crop(current_trial['try_with']).get_timeline().support()
    indices_speech = embeddings.sliding_window.crop(speech_timeline, mode='strict')

    # compare all embeddings to target model
    scores = 2. - cdist(embeddings.data, model, metric='cosine')

    data = np.zeros((len(embeddings.data), 1))
    for i, (window, _) in enumerate(embeddings):
        # make sure the current segment is in 'try_with'
        if i < first or (i not in indices_speech):
            continue
        if i > last:
            break
        data[i] = scores[i]

    data = data[first:last+1] 
    sliding_window = SlidingWindow(start=embeddings.sliding_window[first].start,
                                   duration=embeddings.sliding_window.duration,
                                   step=embeddings.sliding_window.step)
    
    return SlidingWindowFeature(data, sliding_window)
def time2index(
    constraints_time: List[Tuple[Time, Time]], window: SlidingWindow,
) -> List[Tuple[int, int]]:
    """Convert time-based constraints to index-based constraints

    Parameters
    ----------
    constraints_time : list of (float, float)
        Time-based constraints
    window : SlidingWindow
        Window used for embedding extraction

    Returns
    -------
    constraints : list of (int, int)
        Index-based constraints
    """

    constraints = []
    for t1, t2 in constraints_time:
        i1 = window.closest_frame(t1)
        i2 = window.closest_frame(t2)
        if i1 == i2:
            continue
        constraints.append((i1, i2))
    return constraints
Exemple #10
0
    def apply(self, current_file):
        """Computes BIC distance between sliding windows

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        from pyannote.algorithms.stats.gaussian import Gaussian

        t, left, right = next(self.from_file(current_file))

        y = []
        for xL, xR in zip(left, right):
            gL = Gaussian(covariance_type=self.covariance_type).fit(xL)
            gR = Gaussian(covariance_type=self.covariance_type).fit(xR)
            y.append(gL.bic(gR, penalty_coef=0)[0])

        y = np.array(y)

        window = SlidingWindow(duration=2 * self.duration,
                               step=self.step,
                               start=0.)
        return SlidingWindowFeature(y, window)
def chunks(duration: float,
           chunk: float = 30,
           shuffle: bool = False) -> Iterator[Segment]:
    """Partition [0, duration] time range into smaller chunks

    Parameters
    ----------
    duration : float
        Total duration, in seconds.
    chunk : float, optional
        Chunk duration, in seconds. Defaults to 30.
    shuffle : bool, optional
        Yield chunks in random order. Defaults to chronological order.

    Yields
    ------
    focus : Segment
    """

    sliding_window = SlidingWindow(start=0.0, step=chunk, duration=chunk)
    whole = Segment(0, duration)

    if shuffle:
        chunks_ = list(chunks(duration, chunk=chunk, shuffle=False))
        random.shuffle(chunks_)
        for chunk in chunks_:
            yield chunk

    else:
        for window in sliding_window(whole):
            yield window
        if window.end < duration:
            yield Segment(window.end, duration)
    def get_resolution(
        task: Task,
        sample_rate: int = 16000,
        out_channels: List[int] = [512, 512, 512, 512, 512, 512],
        kernel_size: List[int] = [251, 5, 5, 5, 5, 5],
        stride: List[int] = [5, 1, 1, 1, 1, 1],
        max_pool: List[int] = [3, 3, 3, 3, 3, 3],
        **kwargs,
    ) -> SlidingWindow:
        """Output frame resolution"""

        # https://medium.com/mlreview/a-guide-to-receptive-field-arithmetic-for-convolutional-neural-networks-e0f514068807
        padding = 0
        receptive_field, jump, start = 1, 1, 0.5
        for ks, s, mp in zip(kernel_size, stride, max_pool):
            # increase due to (Sinc)Conv1d
            receptive_field += (ks - 1) * jump
            start += ((ks - 1) / 2 - padding) * jump
            jump *= s
            # increase in receptive field due to MaxPool1d
            receptive_field += (mp - 1) * jump
            start += ((mp - 1) / 2 - padding) * jump
            jump *= mp

        return SlidingWindow(duration=receptive_field / sample_rate,
                             step=jump / sample_rate,
                             start=0.0)
Exemple #13
0
    def __init__(self, root_dir=None, duration=0.025, step=None):
        super(PrecomputedHTK, self).__init__()
        self.root_dir = root_dir
        self.duration = duration

        # load any htk file in root_dir/database
        path = '{root_dir}/*/*.htk'.format(root_dir=root_dir)
        found = glob(path)

        # FIXME switch to Py3.5 and use glob 'recursive' parameter
        # http://stackoverflow.com/questions/2186525/
        # use-a-glob-to-find-files-recursively-in-python

        if len(found) > 0:
            file_htk = found[0]
        else:
            msg = "Could not find any HTK file in '{root_dir}'."
            raise ValueError(msg.format(root_dir=root_dir))

        X, sample_period = self.load_htk(file_htk)
        self.dimension_ = X.shape[1]
        self.step = sample_period * 1e-7

        # don't trust HTK header when 'step' is provided by the user.
        # HACK remove this when Pepe's HTK files are fixed...
        if step is not None:
            self.step = step

        self.sliding_window_ = SlidingWindow(start=0.,
                                             duration=self.duration,
                                             step=self.step)
Exemple #14
0
    def __call__(self, item):

        try:
            wav = item['wav']
            y, sample_rate, encoding = pysndfile.sndio.read(wav)
        except IOError as e:
            raise PyannoteFeatureExtractionError(e.message)

        if np.any(np.isnan(y)):
            uri = get_unique_identifier(item)
            msg = 'pysndfile output contains NaNs for file "{uri}".'
            raise PyannoteFeatureExtractionError(msg.format(uri=uri))

        # reshape before selecting channel
        if len(y.shape) < 2:
            y = y.reshape(-1, 1)

        channel = item.get('channel', 1)
        y = y[:, channel - 1]

        sliding_window = SlidingWindow(start=0.,
                                       duration=1. / sample_rate,
                                       step=1. / sample_rate)

        return SlidingWindowFeature(y, sliding_window)
Exemple #15
0
    def iter_segments(self, from_annotation):

        for segment, _, label in from_annotation.itertracks(label=True):

            # skip segments that are too short
            if segment.duration < self.min_duration:
                continue

            # yield segments shorter than duration
            # when variable length segments are allowed
            elif segment.duration < self.duration:
                if self.variable_length_:
                    yield (segment, label)

            # yield sliding segments within current track
            else:
                window = SlidingWindow(
                    duration=self.duration, step=self.step,
                    start=segment.start, end=segment.end)

                for s in window:

                    # if current window is fully contained by segment
                    if s in segment:
                        yield (s, label)

                    # if it is not but variable length segments are allowed
                    elif self.variable_length_:
                        candidate = s & segment
                        if candidate.duration >= self.min_duration:
                            yield (candidate, label)
                        break
Exemple #16
0
    def apply(self, wav):
        """

        Parameter
        ---------
        wav : str
            Path to wav audio file

        Returns
        -------
        predictions : SlidingWindowFeature

        """

        # apply sequence labeling to the whole file
        current_file = {'uri': wav, 'medium': {'wav': wav}}
        predictions = next(self.from_file(current_file))
        n_sequences, _, n_classes = predictions.shape

        # estimate total number of frames (over the duration of the whole file)
        # based on feature extractor internal sliding window and file duration
        samples_window = self.feature_extractor.sliding_window()
        n_samples = samples_window.samples(get_wav_duration(wav)) + 3

        # +3 is a hack to avoid later IndexError resulting from rounding error
        # when cropping samples_window

        # k[i] contains the number of sequences that overlap with frame #i
        k = np.zeros((n_samples, ), dtype=np.int8)

        # y[i] contains the sum of predictions for frame #i
        # over all overlapping samples
        y = np.zeros((n_samples, n_classes), dtype=np.float32)

        # sequence sliding window
        sequence_window = SlidingWindow(duration=self.duration, step=self.step)

        # accumulate predictions over all sequences
        for i in range(n_sequences):

            # position of sequence #i
            window = sequence_window[i]

            # indices of frames overlapped by sequence #i
            indices = samples_window.crop(window,
                                          mode='center',
                                          fixed=self.duration)

            # accumulate predictions
            # TODO - use smarter weights (e.g. Hamming window)
            k[indices] += 1
            y[indices] += predictions[i, :, :]

        # average prediction
        y = (y.T / np.maximum(k, 1)).T

        # returns the whole thing as SlidingWindowFeature
        return SlidingWindowFeature(y, samples_window)
Exemple #17
0
    def iter_segments(self, source):
        """
        Parameters
        ----------
        source : float, Segment, Timeline or Annotation
            If `float`, yield running segments within [0, source).
            If `Segment`, yield running segments within this segment.
            If `Timeline`, yield running segments within this timeline.
            If `Annotation`, yield running segments within its timeline.
        """

        if isinstance(source, Annotation):
            segments = source.get_timeline()

        elif isinstance(source, Timeline):
            segments = source

        elif isinstance(source, Segment):
            segments = [source]

        elif isinstance(source, (int, float)):
            if not self.duration > 0:
                raise ValueError('Duration must be strictly positive.')
            segments = [Segment(0, source)]

        else:
            raise TypeError(
                'source must be float, Segment, Timeline or Annotation')

        for segment in segments:

            # skip segments that are too short
            if segment.duration < self.min_duration:
                continue

            # yield segments shorter than duration
            # when variable length segments are allowed
            elif segment.duration < self.duration:
                if self.variable_length_:
                    yield segment

            # yield sliding segments within current track
            else:
                window = SlidingWindow(
                    duration=self.duration, step=self.step,
                    start=segment.start, end=segment.end)

                for s in window:

                    # if current window is fully contained by segment
                    if s in segment:
                        yield s

                    # if it is not but variable length segments are allowed
                    elif self.variable_length_:
                        yield Segment(start=segment.end - self.duration,
                                      end=segment.end)
                        break
    def __call__(self, current_file, return_sr=False):
        """Obtain waveform

        Parameters
        ----------
        current_file : dict
            `pyannote.database` files.
        return_sr : `bool`, optional
            Return sample rate. Defaults to False

        Returns
        -------
        waveform : `pyannote.core.SlidingWindowFeature`
            Waveform
        sample_rate : `int`
            Only when `return_sr` is set to True
        """

        if "waveform" in current_file:

            if self.sample_rate is None:
                msg = ("`RawAudio` needs to be instantiated with an actual "
                       "`sample_rate` if one wants to use precomputed "
                       "waveform.")
                raise ValueError(msg)
            sample_rate = self.sample_rate

            y = current_file["waveform"]

            if len(y.shape) != 2:
                msg = (f"Precomputed waveform should be provided as a "
                       f"(n_samples, n_channels) `np.ndarray`.")
                raise ValueError(msg)

        else:
            y, sample_rate = sf.read(current_file["audio"],
                                     dtype="float32",
                                     always_2d=True)

        # extract specific channel if requested
        channel = current_file.get("channel", None)
        if channel is not None:
            y = y[:, channel - 1:channel]

        y = self.get_features(y, sample_rate)

        sliding_window = SlidingWindow(start=-0.5 / sample_rate,
                                       duration=1.0 / sample_rate,
                                       step=1.0 / sample_rate)

        if return_sr:
            return (
                SlidingWindowFeature(y, sliding_window),
                sample_rate if self.sample_rate is None else self.sample_rate,
            )

        return SlidingWindowFeature(y, sliding_window)
def speaker_spotting_try_system4(current_trial):

    # target model
    model = {}
    model_id = current_trial['model_id']
    model_embedding = models[current_trial['model_id']]
    model['mid'] = model_id
    model['embedding'] = model_embedding
    # where to look for this target
    try_with = current_trial['try_with']

    # precomputed embedding
    embeddings = precomputed(current_trial)

    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    speech_timeline = REFERENCE[current_trial['uri']].crop(
        current_trial['try_with']).get_timeline().support()
    indices_speech = embeddings.sliding_window.crop(speech_timeline,
                                                    mode='strict')
    first, last = indices[0], indices[-1]
    onlineClustering = clustering.OnlineClustering(
        current_trial['uri'],
        cdist(embeddings.data, embeddings.data, metric='cosine'))
    start = embeddings.sliding_window[0].start
    data = np.zeros((len(embeddings.data), 1))
    for i, (window, _) in enumerate(embeddings):
        if i < first or (i not in indices_speech):
            start = window.end
            continue
        if i > last:
            break
        so_far = Segment(start, window.end)
        score = 0.
        example = {}
        example['segment'] = so_far
        example['embedding'] = embeddings.crop(so_far, mode='center')
        example['indice'] = [i]
        example['distances'] = {}
        example['distances'][model['mid']] = list(
            cdist(example['embedding'], model['embedding'],
                  metric='cosine').flatten())

        onlineClustering.upadateCluster2(example)
        if not onlineClustering.empty():
            #min_dist = min(onlineClustering.computeDistances({'embedding': model}))
            min_dist = min(onlineClustering.modelClusterDistance(model))
            score = max(score, 2 - min_dist)
        data[i] = score
        start = window.end
    data = data[first:last + 1]
    sliding_window = SlidingWindow(
        start=embeddings.sliding_window[first].start,
        duration=embeddings.sliding_window.duration,
        step=embeddings.sliding_window.step)

    return SlidingWindowFeature(data, sliding_window)
Exemple #20
0
    def apply(self, current_file):
        """Compute predictions on a sliding window

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        # frame and sub-sequence sliding windows
        frames = self.feature_extraction.sliding_window()

        batches = [
            batch for batch in self.from_file(current_file, incomplete=True)
        ]
        if not batches:
            data = np.zeros((0, self.dimension), dtype=np.float32)
            return SlidingWindowFeature(data, frames)

        fX = np.vstack(batches)

        subsequences = SlidingWindow(duration=self.duration, step=self.step)

        # get total number of frames
        if isinstance(self.feature_extraction, Precomputed):
            n_frames, _ = self.feature_extraction.shape(current_file)
        else:
            uri = get_unique_identifier(current_file)
            n_frames, _ = self.preprocessed_[uri].data

        # data[i] is the sum of all predictions for frame #i
        data = np.zeros((n_frames, self.dimension), dtype=np.float32)

        # k[i] is the number of sequences that overlap with frame #i
        k = np.zeros((n_frames, 1), dtype=np.int8)

        for subsequence, fX_ in zip(subsequences, fX):

            # indices of frames overlapped by subsequence
            indices = frames.crop(subsequence,
                                  mode='center',
                                  fixed=self.duration)

            # accumulate the outputs
            data[indices] += fX_

            # keep track of the number of overlapping sequence
            # TODO - use smarter weights (e.g. Hamming window)
            k[indices] += 1

        # compute average embedding of each frame
        data = data / np.maximum(k, 1)

        return SlidingWindowFeature(data, frames)
    def __init__(self, root_dir=None):
        super(Precomputed, self).__init__()
        self.root_dir = root_dir

        start = 0
        duration = 2.5
        step = 2
        self.sliding_window_ = SlidingWindow(
            start=start, duration=duration, step=step)
Exemple #22
0
    def __init__(self,
                 root_dir=None,
                 use_memmap=True,
                 sliding_window=None,
                 dimension=None):

        super(Precomputed, self).__init__()
        self.root_dir = Path(root_dir).expanduser().resolve(strict=False)
        self.use_memmap = use_memmap

        path = self.root_dir / 'metadata.yml'
        if path.exists():

            with io.open(path, 'r') as f:
                params = yaml.load(f)

            self.dimension_ = params.pop('dimension')
            self.sliding_window_ = SlidingWindow(**params)

            if dimension is not None and self.dimension_ != dimension:
                msg = 'inconsistent "dimension" (is: {0}, should be: {1})'
                raise ValueError(msg.format(dimension, self.dimensions_))

            if ((sliding_window is not None) and
                ((sliding_window.start != self.sliding_window_.start) or
                 (sliding_window.duration != self.sliding_window_.duration) or
                 (sliding_window.step != self.sliding_window_.step))):
                msg = 'inconsistent "sliding_window"'
                raise ValueError(msg)

        else:

            if sliding_window is None or dimension is None:
                msg = (
                    f'Either directory {self.root_dir} does not exist or it '
                    f'does not contain precomputed features. In case it exists '
                    f'and this was done on purpose, please provide both '
                    f'`sliding_window` and `dimension` parameters when '
                    f'instantianting `Precomputed`.')
                raise ValueError(msg)

            # create parent directory
            mkdir_p(path.parent)

            params = {
                'start': sliding_window.start,
                'duration': sliding_window.duration,
                'step': sliding_window.step,
                'dimension': dimension
            }

            with io.open(path, 'w') as f:
                yaml.dump(params, f, default_flow_style=False)

            self.sliding_window_ = sliding_window
            self.dimension_ = dimension
Exemple #23
0
    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        self.buffer_ = np.array(sequence.data)
        self.initialized_ = True
Exemple #24
0
    def __call__(self, current_file):

        y, sample_rate = read_audio(current_file,
                                    sample_rate=self.sample_rate,
                                    mono=self.mono)

        sliding_window = SlidingWindow(start=0.,
                                       duration=1. / sample_rate,
                                       step=1. / sample_rate)

        return SlidingWindowFeature(y, sliding_window)
Exemple #25
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # no input ==> no output
        if sequence is Stream.NoNewData:
            return Stream.NoNewData

        if sequence is Stream.EndOfStream:
            if not self.initialized_:
                return Stream.EndOfStream

            self.initialized_ = False
            data = self.agg_func(self.buffer_, axis=0)
            return SlidingWindowFeature(data, self.frames_)

        if not self.initialized_:
            return self.initialize(sequence)

        # check that feature sequence uses the common time base
        sw = sequence.sliding_window
        assert sw.duration == self.frames_.duration
        assert sw.step == self.frames_.step
        assert sw.start > self.frames_.start

        delta_start = sw.start - self.frames_.start
        ready = self.frames_.samples(delta_start, mode='center')
        data = self.agg_func(self.buffer_[:, :ready], axis=0)
        output = SlidingWindowFeature(data, self.frames_)

        self.buffer_ = self.buffer_[:, ready:]
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        # remove empty (all NaN) buffers
        n_buffers = self.buffer_.shape[0]
        for i in range(n_buffers):
            if np.any(~np.isnan(self.buffer_[i])):
                break
        self.buffer_ = self.buffer_[i:]

        n_samples = self.buffer_.shape[1]
        n_new_samples = sequence.data.shape[0]
        pad_width = ((0, 1), (0, max(0, n_new_samples - n_samples)))
        for _ in sequence.data.shape[1:]:
            pad_width += ((0, 0), )
        self.buffer_ = np.pad(self.buffer_, pad_width, 'constant',
                              constant_values=np.NAN)
        self.buffer_[-1] = sequence.data

        return output
Exemple #26
0
    def apply(self, current_file):
        """Compute embeddings on a sliding window

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        embedding : SlidingWindowFeature
        """

        # compute embedding on sliding window
        # over the whole duration of the file
        fX = np.vstack(
            [batch for batch in self.from_file(current_file,
                                               incomplete=True)])

        subsequences = SlidingWindow(duration=self.duration, step=self.step)

        if not self.internal:
            return SlidingWindowFeature(fX, subsequences)

        # get total number of frames
        identifier = get_unique_identifier(current_file)
        n_frames = self.preprocessed_['X'][identifier].data.shape[0]

        # data[i] is the sum of all embeddings for frame #i
        data = np.zeros((n_frames, self.dimension), dtype=np.float32)

        # k[i] is the number of sequences that overlap with frame #i
        k = np.zeros((n_frames, 1), dtype=np.int8)

        # frame and sub-sequence sliding windows
        frames = self.feature_extractor.sliding_window()

        for subsequence, fX_ in zip(subsequences, fX):

            # indices of frames overlapped by subsequence
            indices = frames.crop(subsequence,
                                  mode='center',
                                  fixed=self.duration)

            # accumulate their embedding
            data[indices] += fX_

            # keep track of the number of overlapping sequence
            k[indices] += 1

        # compute average embedding of each frame
        data = data / np.maximum(k, 1)

        return SlidingWindowFeature(data, frames)
    def apply(self, current_file, crop=None):
        """Extract embeddings

        Can process either pyannote.database protocol items (as dict) or
        batch of precomputed feature sequences (as numpy array).

        Parameters
        ----------
        current_file : dict or numpy array
            File (from pyannote.database protocol) or batch of precomputed
            feature sequences.
        crop : Segment or Timeline, optional
            When provided, only extract corresponding embeddings.

        Returns
        -------
        embedding : SlidingWindowFeature or numpy array
        """

        # if current_file is in fact a batch of feature sequences
        # use postprocess_ndarray directly.
        if isinstance(current_file, np.ndarray):
            return self.postprocess_ndarray(current_file)

        # HACK: change internal SlidingSegment's source to only extract
        # embeddings on provided "crop". keep track of original source
        # to set it back before the function returns
        source = self.generator.source
        if crop is not None:
            self.generator.source = crop

        # compute embedding on sliding window
        # over the whole duration of the source
        batches = [
            batch for batch in self.from_file(current_file, incomplete=True)
        ]

        self.generator.source = source

        if not batches:
            fX = np.zeros((0, self.dimension))
        else:
            fX = np.vstack(batches)

        if crop is not None:
            return fX

        subsequences = SlidingWindow(duration=self.duration, step=self.step)
        return SlidingWindowFeature(fX, subsequences)
Exemple #28
0
    def __init__(self, root_dir=None):
        super(Precomputed, self).__init__()
        self.root_dir = root_dir

        path = self.get_config_path(self.root_dir)

        f = h5py.File(path)
        start = f.attrs['start']
        duration = f.attrs['duration']
        step = f.attrs['step']
        self.sliding_window_ = SlidingWindow(start=start,
                                             duration=duration,
                                             step=step)
        self.dimension_ = f.attrs['dimension']
        f.close()
Exemple #29
0
    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        data = sequence.data
        shape = (1,) + data.shape
        self.buffer_ = np.ones(shape, dtype=data.dtype)
        self.buffer_[0, :] = data

        self.initialized_ = True

        return Stream.NoNewData
    def tst_iter(self):
        for current_file in super().tst_iter():
            annotated = current_file['annotated']
            annotation = current_file['annotation']

            for segment in annotated:
                sessions = SlidingWindow(start=segment.start,
                                         duration=30., step=30.,
                                         end=segment.end - 3.)

                for session in sessions:
                    session_file = dict(current_file)
                    session_file['annotated'] = annotated.crop(session)
                    session_file['annotation'] = annotation.crop(session)

                    yield session_file
Exemple #31
0
    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        self.buffer_ = np.array(sequence.data)

        self.window_ = SlidingWindow(start=sw.start,
                                     duration=self.duration,
                                     step=self.step)
        self.current_window_ = next(self.window_)
        self.n_samples_ = self.frames_.samples(self.duration, mode='center')
        self.initialized_ = True
Exemple #32
0
def stream_audio(current_file, sample_rate=None, mono=True, duration=1.):
    """Simulate audio file streaming

    Parameters
    ----------
    current_file : dict
        Dictionary given by pyannote.database.
    sample_rate: int, optional
        Target sampling rate. Defaults to using native sampling rate.
    mono : int, optional
        Convert multi-channel to mono. Defaults to True.
    duration : float, optional
        Buffer duration, in seconds. Defaults to 1.

    Returns
    -------
    buffer : iterable
        Yields SlidingWindowFeature instances

    Usage
    -----
    >>> for buffer in stream_audio(current_file):
    ...     do_something_with(buffer)

    Notes
    -----
    In case `current_file` contains a `channel` key, data of this (1-indexed)
    channel will be yielded.

    """

    y, sample_rate = read_audio(current_file,
                                sample_rate=sample_rate,
                                mono=mono)

    n_samples_total = len(y)
    n_samples_buffer = int(duration * sample_rate)

    for i in range(0, n_samples_total, n_samples_buffer):
        data = y[i:i + n_samples_buffer, np.newaxis]
        sw = SlidingWindow(start=i / sample_rate,
                           duration=1 / sample_rate,
                           step=1 / sample_rate)
        yield SlidingWindowFeature(data, sw)

    while True:
        yield Stream.EndOfStream
    def _sessionify(self, current_files):

        for current_file in current_files:

            annotated = current_file['annotated']
            annotation = current_file['annotation']

            for segment in annotated:
                sessions = SlidingWindow(start=segment.start,
                                         duration=60.,
                                         step=60.,
                                         end=segment.end - 60.)

                for session in sessions:
                    session_file = dict(current_file)
                    session_file['annotated'] = annotated.crop(session)
                    session_file['annotation'] = annotation.crop(session)

                    yield session_file
Exemple #34
0
class StreamBuffer(object):
    """This module concatenates (adjacent) input sequences and returns the
    result using a sliding window.

    Parameters
    ----------
    duration : float, optional
        Sliding window duration. Defaults to 3.2 seconds.
    step : float, optional
        Sliding window step. Defaults to `duration`.
    incomplete : bool, optional
        Set to True to return the current buffer on "end-of-stream"
        even if is is not complete. Defaults to False.
    """

    def __init__(self, duration=3.2, step=None, incomplete=False):
        super(StreamBuffer, self).__init__()
        self.duration = duration
        self.step = duration if step is None else step
        self.incomplete = incomplete
        self.initialized_ = False

    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        self.buffer_ = np.array(sequence.data)

        self.window_ = SlidingWindow(start=sw.start,
                                     duration=self.duration,
                                     step=self.step)
        self.current_window_ = next(self.window_)
        self.n_samples_ = self.frames_.samples(self.duration, mode='center')
        self.initialized_ = True

    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # if input stream has ended
        if sequence == Stream.EndOfStream:

            # if buffer has been emptied already, return "end-of-stream"
            if not self.initialized_:
                return Stream.EndOfStream

            # reset buffer
            self.initialized_ = False

            # if requested, return the current buffer on "end-of-stream"
            if self.incomplete:
                return SlidingWindowFeature(self.buffer_, self.frames_)

            return Stream.EndOfStream

        # if input stream continues
        elif sequence != Stream.NoNewData:

            # append to buffer
            if self.initialized_:

                # check that feature sequence uses the common time base
                sw = sequence.sliding_window
                assert sw.duration == self.frames_.duration
                assert sw.step == self.frames_.step

                # check that first frame is exactly the one that is expected
                expected = self.frames_[len(self.buffer_)]
                assert np.allclose(expected, sw[0])

                # append the new samples at the end of buffer
                self.buffer_ = np.concatenate([self.buffer_, sequence.data],
                                              axis=0)

            # initialize buffer
            else:
                self.initialize(sequence)

        # if not enough samples are available, there is nothing to return
        if not self.initialized_ or self.buffer_.shape[0] < self.n_samples_:
            return Stream.NoNewData

        # if enough samples are available, prepare output
        output = SlidingWindowFeature(self.buffer_[:self.n_samples_],
                                      self.frames_)

        # switch to next window
        self.current_window_ = next(self.window_)

        # update buffer by removing old samples and updating start time
        first_valid = self.frames_.crop(self.current_window_,
                                        mode='center',
                                        fixed=self.duration)[0]
        self.buffer_ = self.buffer_[first_valid:]
        self.frames_ = SlidingWindow(start=self.frames_[first_valid].start,
                                     duration=self.frames_.duration,
                                     step=self.frames_.step)

        # if enough samples are available for next window
        # wrap output into a More instance
        if self.buffer_.shape[0] >= self.n_samples_:
            output = More(output)

        return output
Exemple #35
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # if input stream has ended
        if sequence == Stream.EndOfStream:

            # if buffer has been emptied already, return "end-of-stream"
            if not self.initialized_:
                return Stream.EndOfStream

            # reset buffer
            self.initialized_ = False

            # if requested, return the current buffer on "end-of-stream"
            if self.incomplete:
                return SlidingWindowFeature(self.buffer_, self.frames_)

            return Stream.EndOfStream

        # if input stream continues
        elif sequence != Stream.NoNewData:

            # append to buffer
            if self.initialized_:

                # check that feature sequence uses the common time base
                sw = sequence.sliding_window
                assert sw.duration == self.frames_.duration
                assert sw.step == self.frames_.step

                # check that first frame is exactly the one that is expected
                expected = self.frames_[len(self.buffer_)]
                assert np.allclose(expected, sw[0])

                # append the new samples at the end of buffer
                self.buffer_ = np.concatenate([self.buffer_, sequence.data],
                                              axis=0)

            # initialize buffer
            else:
                self.initialize(sequence)

        # if not enough samples are available, there is nothing to return
        if not self.initialized_ or self.buffer_.shape[0] < self.n_samples_:
            return Stream.NoNewData

        # if enough samples are available, prepare output
        output = SlidingWindowFeature(self.buffer_[:self.n_samples_],
                                      self.frames_)

        # switch to next window
        self.current_window_ = next(self.window_)

        # update buffer by removing old samples and updating start time
        first_valid = self.frames_.crop(self.current_window_,
                                        mode='center',
                                        fixed=self.duration)[0]
        self.buffer_ = self.buffer_[first_valid:]
        self.frames_ = SlidingWindow(start=self.frames_[first_valid].start,
                                     duration=self.frames_.duration,
                                     step=self.frames_.step)

        # if enough samples are available for next window
        # wrap output into a More instance
        if self.buffer_.shape[0] >= self.n_samples_:
            output = More(output)

        return output
Exemple #36
0
class StreamAggregate(object):
    """This module accumulates (possibly overlaping) sequences
    and returns their aggregated version as soon as possible.

    Parameters
    ----------
    agg_func : callable, optional
        Aggregation function. Takes buffer of (possibly overlaping) sequences
        as input and returns their aggregation (must support the `axis=0`
        keyword argument). Defaults to np.nanmean.
    """

    def __init__(self, agg_func=np.nanmean):
        super(StreamAggregate, self).__init__()
        self.agg_func = agg_func
        self.initialized_ = False

    def initialize(self, sequence):

        # common time base
        sw = sequence.sliding_window
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        data = sequence.data
        shape = (1,) + data.shape
        self.buffer_ = np.ones(shape, dtype=data.dtype)
        self.buffer_[0, :] = data

        self.initialized_ = True

        return Stream.NoNewData

    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # no input ==> no output
        if sequence is Stream.NoNewData:
            return Stream.NoNewData

        if sequence is Stream.EndOfStream:
            if not self.initialized_:
                return Stream.EndOfStream

            self.initialized_ = False
            data = self.agg_func(self.buffer_, axis=0)
            return SlidingWindowFeature(data, self.frames_)

        if not self.initialized_:
            return self.initialize(sequence)

        # check that feature sequence uses the common time base
        sw = sequence.sliding_window
        assert sw.duration == self.frames_.duration
        assert sw.step == self.frames_.step
        assert sw.start > self.frames_.start

        delta_start = sw.start - self.frames_.start
        ready = self.frames_.samples(delta_start, mode='center')
        data = self.agg_func(self.buffer_[:, :ready], axis=0)
        output = SlidingWindowFeature(data, self.frames_)

        self.buffer_ = self.buffer_[:, ready:]
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        # remove empty (all NaN) buffers
        n_buffers = self.buffer_.shape[0]
        for i in range(n_buffers):
            if np.any(~np.isnan(self.buffer_[i])):
                break
        self.buffer_ = self.buffer_[i:]

        n_samples = self.buffer_.shape[1]
        n_new_samples = sequence.data.shape[0]
        pad_width = ((0, 1), (0, max(0, n_new_samples - n_samples)))
        for _ in sequence.data.shape[1:]:
            pad_width += ((0, 0), )
        self.buffer_ = np.pad(self.buffer_, pad_width, 'constant',
                              constant_values=np.NAN)
        self.buffer_[-1] = sequence.data

        return output