def _partition(self, timeline, coverage): # boundaries (as set of timestamps) boundaries = set([]) for segment in timeline: boundaries.add(segment.start) boundaries.add(segment.end) # partition (as timeline) partition = Annotation() for start, end in pairwise(sorted(boundaries)): segment = Segment(start, end) partition[segment] = '_' cropped = partition.crop(coverage, mode='intersection') return partition.crop(coverage, mode='intersection').anonymize_tracks()
def _partition(self, timeline, coverage): # boundaries (as set of timestamps) boundaries = set([]) for segment in timeline: boundaries.add(segment.start) boundaries.add(segment.end) # partition (as timeline) partition = Annotation() for start, end in pairwise(sorted(boundaries)): segment = Segment(start, end) partition[segment] = '_' return partition.crop(coverage, mode='intersection').anonymize_tracks()
def _xxx_try_iter(self, subset): # load "who speaks when" reference data = self._load_data(subset) diarization = getattr(self, 'diarization', True) if diarization: AnnotationGroups = data['annotation'].groupby(by='uri') else: AnnotationGroups = data['annotation'].groupby( by=['uri', 'speaker']) # load trials data_dir = Path(__file__).parent / 'data' / 'speaker_spotting' trials = data_dir / f'{subset}.trial.txt' names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total'] trials = read_table(trials, delim_whitespace=True, names=names) for trial in trials.itertuples(): model_id = trial.model_id # FIE038_m1 ==> FIE038 # FIE038_m42 ==> FIE038 # Bernard_Pivot_m1 ==> Bernard_Pivot speaker = '_'.join(model_id.split('_')[:-1]) # append Mix-Headset to uri raw_uri = trial.uri uri = f'{raw_uri}.Mix-Headset' # trial session try_with = Segment(start=trial.start, end=trial.end) if diarization: # 'annotation' & 'annotated' are needed when diarization is set # therefore, this needs a bit more work than when set to False. annotation = Annotation(uri=uri) turns = AnnotationGroups.get_group(raw_uri) for t, turn in enumerate(turns.itertuples()): segment = Segment(start=turn.start, end=turn.start + turn.duration) if not (segment & try_with): continue annotation[segment, t] = turn.speaker annotation = annotation.crop(try_with) reference = annotation.label_timeline(speaker) annotated = Timeline(uri=uri, segments=[try_with]) # pack & yield trial current_trial = { 'database': 'Test', 'uri': uri, 'try_with': try_with, 'model_id': model_id, 'reference': reference, 'annotation': annotation, 'annotated': annotated, } else: # 'annotation' & 'annotated' are not needed when diarization is # set to False -- leading to a faster implementation... segments = [] if trial.target == 'target': turns = AnnotationGroups.get_group((raw_uri, speaker)) for t, turn in enumerate(turns.itertuples()): segment = Segment(start=turn.start, end=turn.start + turn.duration) segments.append(segment) reference = Timeline(uri=uri, segments=segments).crop(try_with) # pack & yield trial current_trial = { 'database': 'Test', 'uri': uri, 'try_with': try_with, 'model_id': model_id, 'reference': reference, } yield current_trial
dic_trackID_st_to_speakingFace[int(TrackID_st)] = [int(TrackID_Face), float(proba)] trackID_face_to_name = {} for s, t, name in NamedSpk.itertracks(label=True): if dic_trackID_st_to_speakingFace[t][0] != '': trackID_face_to_name[dic_trackID_st_to_speakingFace[t][0]] = name namedFaces = Annotation(uri=videoID) for s, t, faceID in faces.itertracks(label=True): if t in trackID_face_to_name: namedFaces[s, t] = trackID_face_to_name[t] # write person visible and speaking in a shot: for sshot, tshot, shot in shots.itertracks(label=True): NamedSpkShot = NamedSpk.crop(sshot) NamedFaceShot = namedFaces.crop(sshot) PersonShot = set(NamedSpkShot.labels()) & set(NamedFaceShot.labels()) for p in (PersonShot & set(evidences.keys())): conf = 0.0 for sSpk in NamedSpkShot.label_timeline(p): for sON, tON, name in ON.itertracks(label=True): if name == p: sInter = sON & sSpk if sInter : c = 1.0 + sInter.duration else: sDist = sON ^ sSpk if sDist.duration == 0 : c=1.0 else: c = 1/sDist.duration if c > conf: conf = c
def gecko_JSON_to_Annotation(gecko_JSON, uri=None, modality='speaker', confidence_threshold=0.0, collar=0.0, expected_min_speech_time=0.0, manual=False): """ Parameters: ----------- gecko_JSON : `dict` loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON uri (uniform resource identifier) : `str` which identifies the annotation (e.g. episode number) Default : None modality : `str` modality of the annotation as defined in https://github.com/pyannote/pyannote-core confidence_threshold : `float`, Optional. The segments with confidence under confidence_threshold won't be added to UEM file. Defaults to keep every segment (i.e. 0.0) collar: `float`, Optional. Merge tracks with same label and separated by less than `collar` seconds. Defaults to keep tracks timeline untouched (i.e. 0.0) expected_min_speech_time: `float`, Optional. Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user). Defaults to never suspect anything (i.e. 0.0) manual : `bool` Whether the json is coming from a manual correction or straight from the forced-alignment output. In the former case, the regions timing is used. `confidence_threshold` and `collar` are thus irrelevant. In the latter case (default), the timing of each term is used. Returns: -------- annotation: pyannote `Annotation` for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core annotated: pyannote `Timeline` representing the annotated parts of the gecko_JSON files (depends on confidence_threshold) """ annotation = Annotation(uri, modality) not_annotated = Timeline(uri=uri) for monologue in gecko_JSON["monologues"]: if not monologue: continue # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35 speaker_ids = re.split("@|\+", monologue["speaker"]["id"]) if manual: for speaker_id in speaker_ids: # most of the time there's only one if speaker_id != '': # happens with "all@" annotation[Segment(monologue["start"], monologue["end"]), speaker_id] = speaker_id else: for i, term in enumerate(monologue["terms"]): for speaker_id in speaker_ids: # most of the time there's only one if speaker_id != '': # happens with "all@" annotation[Segment(term["start"], term["end"]), speaker_id] = speaker_id if term["confidence"] <= confidence_threshold: not_annotated.add(Segment(term["start"], term["end"])) if manual: annotated = Timeline([Segment(0.0, monologue["end"])], uri) else: annotation = annotation.support(collar) annotated = not_annotated.gaps(support=Segment(0.0, term["end"])) total_speech_time = annotation.crop(annotated).get_timeline().duration() if total_speech_time < expected_min_speech_time: warnings.warn( f"total speech time of {uri} is only {total_speech_time})") return annotation, annotated
def gecko_JSON_to_UEM(gecko_JSON, uri=None, modality='speaker', confidence_threshold=0.0, collar=0.0, expected_min_speech_time=0.0): """ Parameters: ----------- gecko_JSON : `dict` loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON uri (uniform resource identifier) : `str` which identifies the annotation (e.g. episode number) Default : None modality : `str` modality of the annotation as defined in https://github.com/pyannote/pyannote-core confidence_threshold : `float`, Optional. The segments with confidence under confidence_threshold won't be added to UEM file. Defaults to keep every segment (i.e. 0.0) collar: `float`, Optional. Merge tracks with same label and separated by less than `collar` seconds. Defaults to keep tracks timeline untouched (i.e. 0.0) expected_min_speech_time: `float`, Optional. Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user). Defaults to never suspect anything (i.e. 0.0) Returns: -------- annotation: pyannote `Annotation` for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core annotated: pyannote `Timeline` representing the annotated parts of the gecko_JSON files (depends on confidence_threshold) """ annotation = Annotation(uri, modality) annotated = Timeline(uri=uri) last_confident = 0.0 last_unconfident = 0.0 for monologue in gecko_JSON["monologues"]: if not monologue: continue # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35 speaker_ids = re.split("@|\+", monologue["speaker"]["id"]) for i, term in enumerate(monologue["terms"]): term["confidence"], term["start"], term["end"] = map( float, (term.get("confidence", 0.), term["start"], term["end"])) unknown = False for speaker_id in speaker_ids: # most of the time there's only one if '#unknown#' in speaker_id: unknown = True if speaker_id != '': # happens with "all@" annotation[Segment(term["start"], term["end"]), speaker_id] = speaker_id if term["confidence"] <= confidence_threshold: last_unconfident = term["end"] else: if last_unconfident < last_confident and not unknown: annotated.add(Segment(last_confident, term["end"])) last_confident = term["start"] annotation = annotation.support(collar) total_speech_time = annotation.crop(annotated).get_timeline().duration() if total_speech_time < expected_min_speech_time: warnings.warn( f"total speech time of {uri} is only {total_speech_time})") return annotation, annotated.support()
def tst_try_iter(self): def get_turns(uri): ref_file_path = Path(__file__).parent / 'data' / 'speaker_diarization' / uri ref_file_path = Path(str(ref_file_path) + '.txt') gt_names = ['start', 'end', 'speaker', 'speakerID'] return read_table(os.path.join(data_dir, ref_file_path), delim_whitespace=True, names=gt_names) diarization = getattr(self, 'diarization', True) # load trials data_dir = Path(__file__).parent / 'data' / 'speaker_spotting' trials = data_dir / 'tst.trial.txt' names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total'] trials = read_table(trials, delim_whitespace=True, names=names) for trial in trials.itertuples(): model_id = trial.model_id speaker = model_id uri = trial.uri # trial session try_with = Segment(start=trial.start, end=trial.end) if diarization: # 'annotation' & 'annotated' are needed when diarization is set # therefore, this needs a bit more work than when set to False. annotation = Annotation(uri=uri) turns = get_turns(uri) for t, turn in enumerate(turns.itertuples()): segment = Segment(start=turn.start, end=turn.end) if not (segment & try_with): continue annotation[segment, t] = turn.speakerID annotation = annotation.crop(try_with) reference = annotation.label_timeline(speaker) annotated = Timeline(uri=uri, segments=[try_with]) # pack & yield trial current_trial = { 'database': 'Odessa', 'uri': uri, 'try_with': try_with, 'model_id': model_id, 'reference': reference, 'annotation': annotation, 'annotated': annotated, } else: # 'annotation' & 'annotated' are not needed when diarization is # set to False -- leading to a faster implementation... segments = [] if trial.target == 'target': turns = get_turns(uri).groupby(by='speakerID') for t, turn in enumerate(turns.get_group(speaker).itertuples()): segment = Segment(start=turn.start, end=turn.end) segments.append(segment) reference = Timeline(uri=uri, segments=segments).crop(try_with) # pack & yield trial current_trial = { 'database': 'Odessa', 'uri': uri, 'try_with': try_with, 'model_id': model_id, 'reference': reference, } yield current_trial