def get_resolution( task: Task, sample_rate: int = 16000, out_channels: List[int] = [512, 512, 512, 512, 512, 512], kernel_size: List[int] = [251, 5, 5, 5, 5, 5], stride: List[int] = [5, 1, 1, 1, 1, 1], max_pool: List[int] = [3, 3, 3, 3, 3, 3], **kwargs, ) -> SlidingWindow: """Output frame resolution""" # https://medium.com/mlreview/a-guide-to-receptive-field-arithmetic-for-convolutional-neural-networks-e0f514068807 padding = 0 receptive_field, jump, start = 1, 1, 0.5 for ks, s, mp in zip(kernel_size, stride, max_pool): # increase due to (Sinc)Conv1d receptive_field += (ks - 1) * jump start += ((ks - 1) / 2 - padding) * jump jump *= s # increase in receptive field due to MaxPool1d receptive_field += (mp - 1) * jump start += ((mp - 1) / 2 - padding) * jump jump *= mp return SlidingWindow(duration=receptive_field / sample_rate, step=jump / sample_rate, start=0.0)
def __call__(self, item): try: wav = item['wav'] y, sample_rate, encoding = pysndfile.sndio.read(wav) except IOError as e: raise PyannoteFeatureExtractionError(e.message) if np.any(np.isnan(y)): uri = get_unique_identifier(item) msg = 'pysndfile output contains NaNs for file "{uri}".' raise PyannoteFeatureExtractionError(msg.format(uri=uri)) # reshape before selecting channel if len(y.shape) < 2: y = y.reshape(-1, 1) channel = item.get('channel', 1) y = y[:, channel - 1] sliding_window = SlidingWindow(start=0., duration=1. / sample_rate, step=1. / sample_rate) return SlidingWindowFeature(y, sliding_window)
def apply(self, current_file): """Computes BIC distance between sliding windows Parameter --------- current_file : dict Returns ------- predictions : SlidingWindowFeature """ from pyannote.algorithms.stats.gaussian import Gaussian t, left, right = next(self.from_file(current_file)) y = [] for xL, xR in zip(left, right): gL = Gaussian(covariance_type=self.covariance_type).fit(xL) gR = Gaussian(covariance_type=self.covariance_type).fit(xR) y.append(gL.bic(gR, penalty_coef=0)[0]) y = np.array(y) window = SlidingWindow(duration=2 * self.duration, step=self.step, start=0.) return SlidingWindowFeature(y, window)
def __init__(self, root_dir=None, duration=0.025, step=None): super(PrecomputedHTK, self).__init__() self.root_dir = root_dir self.duration = duration # load any htk file in root_dir/database path = '{root_dir}/*/*.htk'.format(root_dir=root_dir) found = glob(path) # FIXME switch to Py3.5 and use glob 'recursive' parameter # http://stackoverflow.com/questions/2186525/ # use-a-glob-to-find-files-recursively-in-python if len(found) > 0: file_htk = found[0] else: msg = "Could not find any HTK file in '{root_dir}'." raise ValueError(msg.format(root_dir=root_dir)) X, sample_period = self.load_htk(file_htk) self.dimension_ = X.shape[1] self.step = sample_period * 1e-7 # don't trust HTK header when 'step' is provided by the user. # HACK remove this when Pepe's HTK files are fixed... if step is not None: self.step = step self.sliding_window_ = SlidingWindow(start=0., duration=self.duration, step=self.step)
def apply(self, wav): """Computes distance between sliding windows embeddings Parameter --------- wav : str Path to wav audio file Returns ------- predictions : SlidingWindowFeature """ from pyannote.algorithms.stats.gaussian import Gaussian current_file = {'uri': wav, 'medium': {'wav': wav}} t, left, right = next(self.from_file(current_file)) y = [] for xL, xR in zip(left, right): gL = Gaussian(covariance_type='diag').fit(xL) gR = Gaussian(covariance_type='diag').fit(xR) y.append(gL.divergence(gR)) y = np.array(y) window = SlidingWindow(duration=2 * self.duration, step=self.step, start=0.) return SlidingWindowFeature(y, window)
def initialize(self, sequence): # common time base sw = sequence.sliding_window self.frames_ = SlidingWindow(start=sw.start, duration=sw.duration, step=sw.step) self.buffer_ = np.array(sequence.data) self.window_ = SlidingWindow(start=sw.start, duration=self.duration, step=self.step) self.current_window_ = next(self.window_) self.n_samples_ = self.frames_.samples(self.duration, mode='center') self.initialized_ = True
def iter_segments(self, from_annotation): for segment, _, label in from_annotation.itertracks(label=True): # skip segments that are too short if segment.duration < self.min_duration: continue # yield segments shorter than duration # when variable length segments are allowed elif segment.duration < self.duration: if self.variable_length_: yield (segment, label) # yield sliding segments within current track else: window = SlidingWindow( duration=self.duration, step=self.step, start=segment.start, end=segment.end) for s in window: # if current window is fully contained by segment if s in segment: yield (s, label) # if it is not but variable length segments are allowed elif self.variable_length_: candidate = s & segment if candidate.duration >= self.min_duration: yield (candidate, label) break
def chunks(duration: float, chunk: float = 30, shuffle: bool = False) -> Iterator[Segment]: """Partition [0, duration] time range into smaller chunks Parameters ---------- duration : float Total duration, in seconds. chunk : float, optional Chunk duration, in seconds. Defaults to 30. shuffle : bool, optional Yield chunks in random order. Defaults to chronological order. Yields ------ focus : Segment """ sliding_window = SlidingWindow(start=0.0, step=chunk, duration=chunk) whole = Segment(0, duration) if shuffle: chunks_ = list(chunks(duration, chunk=chunk, shuffle=False)) random.shuffle(chunks_) for chunk in chunks_: yield chunk else: for window in sliding_window(whole): yield window if window.end < duration: yield Segment(window.end, duration)
def speaker_spotting_try(current_trial): # target model model = models[current_trial['model_id']] # where to look for this target try_with = current_trial['try_with'] # precomputed embedding embeddings = precomputed(current_trial) # find index of first and last embedding fully included in 'try_with' indices = embeddings.sliding_window.crop(try_with, mode='strict') first, last = indices[0], indices[-1] speech_timeline = REFERENCE[current_trial['uri']].crop(current_trial['try_with']).get_timeline().support() indices_speech = embeddings.sliding_window.crop(speech_timeline, mode='strict') # compare all embeddings to target model scores = 2. - cdist(embeddings.data, model, metric='cosine') data = np.zeros((len(embeddings.data), 1)) for i, (window, _) in enumerate(embeddings): # make sure the current segment is in 'try_with' if i < first or (i not in indices_speech): continue if i > last: break data[i] = scores[i] data = data[first:last+1] sliding_window = SlidingWindow(start=embeddings.sliding_window[first].start, duration=embeddings.sliding_window.duration, step=embeddings.sliding_window.step) return SlidingWindowFeature(data, sliding_window)
def speaker_spotting_try(current_trial): # target model model = models[current_trial['model_id']] # where to look for this target try_with = current_trial['try_with'] # precomputed embedding embeddings = precomputed(current_trial) # find index of first and last embedding fully included in 'try_with' indices = embeddings.sliding_window.crop(try_with, mode='strict') first, last = indices[0], indices[-1] speech_timeline = SAD[current_trial['uri']] indices_speech = embeddings.sliding_window.crop(speech_timeline, mode='center') # compare all embeddings to target model data = 2. - np.mean( cdist(embeddings.data, model, metric='cosine'), axis=1, keepdims=True) score = np.zeros((len(embeddings.data) + 2, 1)) indices_speech = [ indice for indice in indices_speech if indice < len(data) ] score[indices_speech] = data[indices_speech] score = score[first:last + 1] sliding_window = SlidingWindow( start=embeddings.sliding_window[first].start, duration=embeddings.sliding_window.duration, step=embeddings.sliding_window.step) return SlidingWindowFeature(score, sliding_window)
def iter_segments(self, source): """ Parameters ---------- source : float, Segment, Timeline or Annotation If `float`, yield running segments within [0, source). If `Segment`, yield running segments within this segment. If `Timeline`, yield running segments within this timeline. If `Annotation`, yield running segments within its timeline. """ if isinstance(source, Annotation): segments = source.get_timeline() elif isinstance(source, Timeline): segments = source elif isinstance(source, Segment): segments = [source] elif isinstance(source, (int, float)): if not self.duration > 0: raise ValueError('Duration must be strictly positive.') segments = [Segment(0, source)] else: raise TypeError( 'source must be float, Segment, Timeline or Annotation') for segment in segments: # skip segments that are too short if segment.duration < self.min_duration: continue # yield segments shorter than duration # when variable length segments are allowed elif segment.duration < self.duration: if self.variable_length_: yield segment # yield sliding segments within current track else: window = SlidingWindow( duration=self.duration, step=self.step, start=segment.start, end=segment.end) for s in window: # if current window is fully contained by segment if s in segment: yield s # if it is not but variable length segments are allowed elif self.variable_length_: yield Segment(start=segment.end - self.duration, end=segment.end) break
def apply(self, wav): """ Parameter --------- wav : str Path to wav audio file Returns ------- predictions : SlidingWindowFeature """ # apply sequence labeling to the whole file current_file = {'uri': wav, 'medium': {'wav': wav}} predictions = next(self.from_file(current_file)) n_sequences, _, n_classes = predictions.shape # estimate total number of frames (over the duration of the whole file) # based on feature extractor internal sliding window and file duration samples_window = self.feature_extractor.sliding_window() n_samples = samples_window.samples(get_wav_duration(wav)) + 3 # +3 is a hack to avoid later IndexError resulting from rounding error # when cropping samples_window # k[i] contains the number of sequences that overlap with frame #i k = np.zeros((n_samples, ), dtype=np.int8) # y[i] contains the sum of predictions for frame #i # over all overlapping samples y = np.zeros((n_samples, n_classes), dtype=np.float32) # sequence sliding window sequence_window = SlidingWindow(duration=self.duration, step=self.step) # accumulate predictions over all sequences for i in range(n_sequences): # position of sequence #i window = sequence_window[i] # indices of frames overlapped by sequence #i indices = samples_window.crop(window, mode='center', fixed=self.duration) # accumulate predictions # TODO - use smarter weights (e.g. Hamming window) k[indices] += 1 y[indices] += predictions[i, :, :] # average prediction y = (y.T / np.maximum(k, 1)).T # returns the whole thing as SlidingWindowFeature return SlidingWindowFeature(y, samples_window)
def speaker_spotting_try_system4(current_trial): # target model model = {} model_id = current_trial['model_id'] model_embedding = models[current_trial['model_id']] model['mid'] = model_id model['embedding'] = model_embedding # where to look for this target try_with = current_trial['try_with'] # precomputed embedding embeddings = precomputed(current_trial) # find index of first and last embedding fully included in 'try_with' indices = embeddings.sliding_window.crop(try_with, mode='strict') speech_timeline = REFERENCE[current_trial['uri']].crop( current_trial['try_with']).get_timeline().support() indices_speech = embeddings.sliding_window.crop(speech_timeline, mode='strict') first, last = indices[0], indices[-1] onlineClustering = clustering.OnlineClustering( current_trial['uri'], cdist(embeddings.data, embeddings.data, metric='cosine')) start = embeddings.sliding_window[0].start data = np.zeros((len(embeddings.data), 1)) for i, (window, _) in enumerate(embeddings): if i < first or (i not in indices_speech): start = window.end continue if i > last: break so_far = Segment(start, window.end) score = 0. example = {} example['segment'] = so_far example['embedding'] = embeddings.crop(so_far, mode='center') example['indice'] = [i] example['distances'] = {} example['distances'][model['mid']] = list( cdist(example['embedding'], model['embedding'], metric='cosine').flatten()) onlineClustering.upadateCluster2(example) if not onlineClustering.empty(): #min_dist = min(onlineClustering.computeDistances({'embedding': model})) min_dist = min(onlineClustering.modelClusterDistance(model)) score = max(score, 2 - min_dist) data[i] = score start = window.end data = data[first:last + 1] sliding_window = SlidingWindow( start=embeddings.sliding_window[first].start, duration=embeddings.sliding_window.duration, step=embeddings.sliding_window.step) return SlidingWindowFeature(data, sliding_window)
def __call__(self, current_file, return_sr=False): """Obtain waveform Parameters ---------- current_file : dict `pyannote.database` files. return_sr : `bool`, optional Return sample rate. Defaults to False Returns ------- waveform : `pyannote.core.SlidingWindowFeature` Waveform sample_rate : `int` Only when `return_sr` is set to True """ if "waveform" in current_file: if self.sample_rate is None: msg = ("`RawAudio` needs to be instantiated with an actual " "`sample_rate` if one wants to use precomputed " "waveform.") raise ValueError(msg) sample_rate = self.sample_rate y = current_file["waveform"] if len(y.shape) != 2: msg = (f"Precomputed waveform should be provided as a " f"(n_samples, n_channels) `np.ndarray`.") raise ValueError(msg) else: y, sample_rate = sf.read(current_file["audio"], dtype="float32", always_2d=True) # extract specific channel if requested channel = current_file.get("channel", None) if channel is not None: y = y[:, channel - 1:channel] y = self.get_features(y, sample_rate) sliding_window = SlidingWindow(start=-0.5 / sample_rate, duration=1.0 / sample_rate, step=1.0 / sample_rate) if return_sr: return ( SlidingWindowFeature(y, sliding_window), sample_rate if self.sample_rate is None else self.sample_rate, ) return SlidingWindowFeature(y, sliding_window)
def apply(self, current_file): """Compute predictions on a sliding window Parameter --------- current_file : dict Returns ------- predictions : SlidingWindowFeature """ # frame and sub-sequence sliding windows frames = self.feature_extraction.sliding_window() batches = [ batch for batch in self.from_file(current_file, incomplete=True) ] if not batches: data = np.zeros((0, self.dimension), dtype=np.float32) return SlidingWindowFeature(data, frames) fX = np.vstack(batches) subsequences = SlidingWindow(duration=self.duration, step=self.step) # get total number of frames if isinstance(self.feature_extraction, Precomputed): n_frames, _ = self.feature_extraction.shape(current_file) else: uri = get_unique_identifier(current_file) n_frames, _ = self.preprocessed_[uri].data # data[i] is the sum of all predictions for frame #i data = np.zeros((n_frames, self.dimension), dtype=np.float32) # k[i] is the number of sequences that overlap with frame #i k = np.zeros((n_frames, 1), dtype=np.int8) for subsequence, fX_ in zip(subsequences, fX): # indices of frames overlapped by subsequence indices = frames.crop(subsequence, mode='center', fixed=self.duration) # accumulate the outputs data[indices] += fX_ # keep track of the number of overlapping sequence # TODO - use smarter weights (e.g. Hamming window) k[indices] += 1 # compute average embedding of each frame data = data / np.maximum(k, 1) return SlidingWindowFeature(data, frames)
def __init__(self, root_dir=None): super(Precomputed, self).__init__() self.root_dir = root_dir start = 0 duration = 2.5 step = 2 self.sliding_window_ = SlidingWindow( start=start, duration=duration, step=step)
def __init__(self, root_dir=None, use_memmap=True, sliding_window=None, dimension=None): super(Precomputed, self).__init__() self.root_dir = Path(root_dir).expanduser().resolve(strict=False) self.use_memmap = use_memmap path = self.root_dir / 'metadata.yml' if path.exists(): with io.open(path, 'r') as f: params = yaml.load(f) self.dimension_ = params.pop('dimension') self.sliding_window_ = SlidingWindow(**params) if dimension is not None and self.dimension_ != dimension: msg = 'inconsistent "dimension" (is: {0}, should be: {1})' raise ValueError(msg.format(dimension, self.dimensions_)) if ((sliding_window is not None) and ((sliding_window.start != self.sliding_window_.start) or (sliding_window.duration != self.sliding_window_.duration) or (sliding_window.step != self.sliding_window_.step))): msg = 'inconsistent "sliding_window"' raise ValueError(msg) else: if sliding_window is None or dimension is None: msg = ( f'Either directory {self.root_dir} does not exist or it ' f'does not contain precomputed features. In case it exists ' f'and this was done on purpose, please provide both ' f'`sliding_window` and `dimension` parameters when ' f'instantianting `Precomputed`.') raise ValueError(msg) # create parent directory mkdir_p(path.parent) params = { 'start': sliding_window.start, 'duration': sliding_window.duration, 'step': sliding_window.step, 'dimension': dimension } with io.open(path, 'w') as f: yaml.dump(params, f, default_flow_style=False) self.sliding_window_ = sliding_window self.dimension_ = dimension
def initialize(self, sequence): # common time base sw = sequence.sliding_window self.frames_ = SlidingWindow(start=sw.start, duration=sw.duration, step=sw.step) self.buffer_ = np.array(sequence.data) self.initialized_ = True
def __call__(self, sequence=Stream.NoNewData): if isinstance(sequence, More): sequence = sequence.output # no input ==> no output if sequence is Stream.NoNewData: return Stream.NoNewData if sequence is Stream.EndOfStream: if not self.initialized_: return Stream.EndOfStream self.initialized_ = False data = self.agg_func(self.buffer_, axis=0) return SlidingWindowFeature(data, self.frames_) if not self.initialized_: return self.initialize(sequence) # check that feature sequence uses the common time base sw = sequence.sliding_window assert sw.duration == self.frames_.duration assert sw.step == self.frames_.step assert sw.start > self.frames_.start delta_start = sw.start - self.frames_.start ready = self.frames_.samples(delta_start, mode='center') data = self.agg_func(self.buffer_[:, :ready], axis=0) output = SlidingWindowFeature(data, self.frames_) self.buffer_ = self.buffer_[:, ready:] self.frames_ = SlidingWindow(start=sw.start, duration=sw.duration, step=sw.step) # remove empty (all NaN) buffers n_buffers = self.buffer_.shape[0] for i in range(n_buffers): if np.any(~np.isnan(self.buffer_[i])): break self.buffer_ = self.buffer_[i:] n_samples = self.buffer_.shape[1] n_new_samples = sequence.data.shape[0] pad_width = ((0, 1), (0, max(0, n_new_samples - n_samples))) for _ in sequence.data.shape[1:]: pad_width += ((0, 0), ) self.buffer_ = np.pad(self.buffer_, pad_width, 'constant', constant_values=np.NAN) self.buffer_[-1] = sequence.data return output
def __call__(self, current_file): y, sample_rate = read_audio(current_file, sample_rate=self.sample_rate, mono=self.mono) sliding_window = SlidingWindow(start=0., duration=1. / sample_rate, step=1. / sample_rate) return SlidingWindowFeature(y, sliding_window)
def __init__(self, sample_rate=None, mono=True, augmentation=None): super().__init__() self.sample_rate = sample_rate self.mono = mono self.augmentation = augmentation if sample_rate is not None: self.sliding_window_ = SlidingWindow(start=-.5 / sample_rate, duration=1. / sample_rate, step=1. / sample_rate)
def apply(self, current_file): """Compute embeddings on a sliding window Parameter --------- current_file : dict Returns ------- embedding : SlidingWindowFeature """ # compute embedding on sliding window # over the whole duration of the file fX = np.vstack( [batch for batch in self.from_file(current_file, incomplete=True)]) subsequences = SlidingWindow(duration=self.duration, step=self.step) if not self.internal: return SlidingWindowFeature(fX, subsequences) # get total number of frames identifier = get_unique_identifier(current_file) n_frames = self.preprocessed_['X'][identifier].data.shape[0] # data[i] is the sum of all embeddings for frame #i data = np.zeros((n_frames, self.dimension), dtype=np.float32) # k[i] is the number of sequences that overlap with frame #i k = np.zeros((n_frames, 1), dtype=np.int8) # frame and sub-sequence sliding windows frames = self.feature_extractor.sliding_window() for subsequence, fX_ in zip(subsequences, fX): # indices of frames overlapped by subsequence indices = frames.crop(subsequence, mode='center', fixed=self.duration) # accumulate their embedding data[indices] += fX_ # keep track of the number of overlapping sequence k[indices] += 1 # compute average embedding of each frame data = data / np.maximum(k, 1) return SlidingWindowFeature(data, frames)
def crop(self, current_file, segment, mode='center', fixed=None) -> np.ndarray: """Fast version of self(current_file).crop(segment, mode='center', + fixed=segment.duration) Parameters ---------- current_file : dict `pyannote.database` file. Must contain a 'duration' key that provides the duration (in seconds) of the audio file. segment : `pyannote.core.Segment` Segment from which to extract features. Returns ------- features : (n_frames, dimension) numpy array Extracted features See also -------- `pyannote.core.SlidingWindowFeature.crop` """ context = self.get_context_duration() # extend segment on both sides with requested context xsegment = Segment( max(0, segment.start - context), min(current_file['duration'], segment.end + context)) # obtain (augmented) waveform on this extended segment y = self.raw_audio_.crop(current_file, xsegment, mode='center', fixed=xsegment.duration) features = self.get_features(y, self.sample_rate) # get rid of additional context before returning frames = self.sliding_window shifted_frames = SlidingWindow(start=xsegment.start - frames.step, step=frames.step, duration=frames.duration) (start, end), = shifted_frames.crop(segment, mode=mode, fixed=fixed, return_ranges=True) # HACK for when start (returned by shifted_frames.crop) is negative # due to floating point precision. if start < 0: if fixed is not None: end -= start start = 0 return features[start:end]
def apply(self, current_file, crop=None): """Extract embeddings Can process either pyannote.database protocol items (as dict) or batch of precomputed feature sequences (as numpy array). Parameters ---------- current_file : dict or numpy array File (from pyannote.database protocol) or batch of precomputed feature sequences. crop : Segment or Timeline, optional When provided, only extract corresponding embeddings. Returns ------- embedding : SlidingWindowFeature or numpy array """ # if current_file is in fact a batch of feature sequences # use postprocess_ndarray directly. if isinstance(current_file, np.ndarray): return self.postprocess_ndarray(current_file) # HACK: change internal SlidingSegment's source to only extract # embeddings on provided "crop". keep track of original source # to set it back before the function returns source = self.generator.source if crop is not None: self.generator.source = crop # compute embedding on sliding window # over the whole duration of the source batches = [ batch for batch in self.from_file(current_file, incomplete=True) ] self.generator.source = source if not batches: fX = np.zeros((0, self.dimension)) else: fX = np.vstack(batches) if crop is not None: return fX subsequences = SlidingWindow(duration=self.duration, step=self.step) return SlidingWindowFeature(fX, subsequences)
def __init__(self, root_dir=None): super(Precomputed, self).__init__() self.root_dir = root_dir path = self.get_config_path(self.root_dir) f = h5py.File(path) start = f.attrs['start'] duration = f.attrs['duration'] step = f.attrs['step'] self.sliding_window_ = SlidingWindow(start=start, duration=duration, step=step) self.dimension_ = f.attrs['dimension'] f.close()
def tst_iter(self): for current_file in super().tst_iter(): annotated = current_file['annotated'] annotation = current_file['annotation'] for segment in annotated: sessions = SlidingWindow(start=segment.start, duration=30., step=30., end=segment.end - 3.) for session in sessions: session_file = dict(current_file) session_file['annotated'] = annotated.crop(session) session_file['annotation'] = annotation.crop(session) yield session_file
def initialize(self, sequence): # common time base sw = sequence.sliding_window self.frames_ = SlidingWindow(start=sw.start, duration=sw.duration, step=sw.step) data = sequence.data shape = (1, ) + data.shape self.buffer_ = np.ones(shape, dtype=data.dtype) self.buffer_[0, :] = data self.initialized_ = True return Stream.NoNewData
def stream_audio(current_file, sample_rate=None, mono=True, duration=1.): """Simulate audio file streaming Parameters ---------- current_file : dict Dictionary given by pyannote.database. sample_rate: int, optional Target sampling rate. Defaults to using native sampling rate. mono : int, optional Convert multi-channel to mono. Defaults to True. duration : float, optional Buffer duration, in seconds. Defaults to 1. Returns ------- buffer : iterable Yields SlidingWindowFeature instances Usage ----- >>> for buffer in stream_audio(current_file): ... do_something_with(buffer) Notes ----- In case `current_file` contains a `channel` key, data of this (1-indexed) channel will be yielded. """ y, sample_rate = read_audio(current_file, sample_rate=sample_rate, mono=mono) n_samples_total = len(y) n_samples_buffer = int(duration * sample_rate) for i in range(0, n_samples_total, n_samples_buffer): data = y[i:i + n_samples_buffer, np.newaxis] sw = SlidingWindow(start=i / sample_rate, duration=1 / sample_rate, step=1 / sample_rate) yield SlidingWindowFeature(data, sw) while True: yield Stream.EndOfStream
def _sessionify(self, current_files): for current_file in current_files: annotated = current_file['annotated'] annotation = current_file['annotation'] for segment in annotated: sessions = SlidingWindow(start=segment.start, duration=60., step=60., end=segment.end - 60.) for session in sessions: session_file = dict(current_file) session_file['annotated'] = annotated.crop(session) session_file['annotation'] = annotation.crop(session) yield session_file
def apply(self, current_file): """Computes distance between sliding windows embeddings Parameter --------- current_file : dict Returns ------- predictions : SlidingWindowFeature """ # apply sequence labeling to the whole file t, left, right = next(self.from_file(current_file)) y = np.sqrt(np.sum((left - right)**2, axis=-1)) window = SlidingWindow(duration=2 * self.duration, step=self.step, start=0.) return SlidingWindowFeature(y, window)