def transcription(): transcription = Transcription() transcription.add_edge(3, 5, speech="hi there, I'm Penny", speaker='Penny') transcription.add_edge(5, 5.5) transcription.add_edge(5.5, 7, speech="hi. I'm Leonard", speaker='Leonard') transcription.add_edge(T('A'), T('B'), summary='Leonard meets Penny') transcription.add_edge(7, T('C')) return transcription
def __call__(self, vtranscription, htranscription, vattribute=None, hattribute=None): """Align two transcriptions Parameters ---------- vtranscription, htranscription : `Transcription` vattribute, hattribute : str Returns ------- merged : `Transcription` """ # make sure transcriptions do not share any drifting labels # and also keep track of `vertical` mapping so that we can # retrieve original `vertical` drifting times at the end T.reset() vtranscription, vmapping = vtranscription.relabel_drifting_nodes() htranscription, _ = htranscription.relabel_drifting_nodes() # compute distance matrix vsequence = self._get_sequence(vtranscription, attribute=vattribute) hsequence = self._get_sequence(htranscription, attribute=hattribute) distance = self.pairwise_distance(vsequence, hsequence) # align and merge vindex, _ = six.moves.zip(*vsequence) hindex, _ = six.moves.zip(*hsequence) alignment = self._dtw.get_alignment(vindex, hindex, distance=distance) merged = self.merge(vtranscription, htranscription, alignment) # retrieve original `vertical` drifting times # in case they have not been anchored relabeled, _ = merged.relabel_drifting_nodes(vmapping) return relabeled
def _duration(self, lines, start, end): """Iterate dialogue lines + (estimated) timeranges""" if self.duration: length = np.array([len(line)+1 for line in lines]) ratio = 1. * np.cumsum(length) / np.sum(length) end_times = start + (end - start) * ratio else: end_times = [T() for line in lines[:-1]] + [end] start_time = start for line, end_time in six.moves.zip(lines, end_times): yield line, start_time, end_time start_time = end_time
def __call__(self, vtranscription, htranscription, vattribute=None, hattribute=None): """Align two transcriptions Parameters ---------- vtranscription, htranscription : `Transcription` vattribute, hattribute : str Returns ------- merged : `Transcription` """ # make sure transcriptions do not share any drifting labels # and also keep track of `vertical` mapping so that we can # retrieve original `vertical` drifting times at the end T.reset() vtranscription, vmapping = vtranscription.relabel_drifting_nodes() htranscription, _ = htranscription.relabel_drifting_nodes() # compute distance matrix vsequence = self._get_sequence(vtranscription, attribute=vattribute) hsequence = self._get_sequence(htranscription, attribute=hattribute) distance = self.pairwise_distance(vsequence, hsequence) # align and merge vindex, _ = itertools.izip(*vsequence) hindex, _ = itertools.izip(*hsequence) alignment = self._dtw.get_alignment(vindex, hindex, distance=distance) merged = self.merge(vtranscription, htranscription, alignment) # retrieve original `vertical` drifting times # in case they have not been anchored relabeled, _ = merged.relabel_drifting_nodes(vmapping) return relabeled
def get_resource_from_plugin(self, resource_type, episode): """Load resource from plugin, store it in memory and return it Parameters ---------- episode : Episode Episode resource_type : str Type of resource Returns ------- resource : Timeline, Annotation or Transcription Resource of type `resource_type` for requested episode Raises ------ Exception If plugin failed to provide the requested resource """ msg = 'getting {t:s} for {e!s} from plugin' logging.debug(msg.format(e=episode, t=resource_type)) resource = self.resources[episode][resource_type] method = resource['method'] params = resource['params'] T.reset() result = method(**params) msg = 'saving {t:s} for {e!s} into memory' logging.debug(msg.format(e=episode, t=resource_type)) self.resources[episode][resource_type]['result'] = result return result
def read(self, path, **kwargs): T.reset() transcription = Transcription() previousNode = TStart arc = [] for line in self.iterlines(path, encoding='utf-8'): # skip comments if re.search(r'^;;', line): continue fields = line.strip().split() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ word = fields[4] # remove punctuations if not self.punctuation: word = re.sub(r'[\.!,;?":]+', ' ', word) word = re.sub(r' +', ' ', word) word = word.strip() if not word: continue # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ confidence = float(fields[5]) start_time = round(float(fields[2]), 3) duration = round(float(fields[3]), 3) end_time = float(start_time)+float(duration) end_time = round(end_time, 3) if duration == 0: node_start = previousNode node_end = T() if len(arc) == 2: transcription.remove_edge(arc[0], arc[1]) transcription.add_edge(arc[0], node_end, **arc_data) node_inter = T() transcription.add_edge(node_end, node_inter, speech=word, confidence=confidence) transcription.add_edge(node_inter, arc[1]) arc.append(node_end) arc.append(node_inter) node_end=arc[1] elif len(arc) > 2: node_anc_start = arc[0] node_anc_end = arc[1] transcription.remove_edge(arc[len(arc)-1], node_anc_end) transcription.add_edge(arc[len(arc)-1], node_end, speech=word, confidence=confidence) transcription.add_edge(node_end, node_anc_end) arc.append(node_end) node_end=arc[1] else: addEdge = True node_start = T(start_time) node_end = T(end_time) if previousNode.drifting: if not transcription.has_edge(previousNode, node_start): transcription.add_edge(previousNode, node_start) else: if node_start < previousNode: node_start = previousNode elif node_start > previousNode: transcription.add_edge(previousNode, node_start) if node_start.anchored and node_end.anchored: if node_start == node_end: addEdge = False node_start = previousNode node_end = T() if len(arc) == 2: transcription.remove_edge(arc[0], arc[1]) transcription.add_edge(arc[0], node_end, **arc_data) node_inter = T() transcription.add_edge(node_end, node_inter, speech=word, confidence=confidence) transcription.add_edge(node_inter, arc[1]) arc.append(node_end) arc.append(node_inter) node_end = arc[1] elif len(arc) > 2: node_anc_start = arc[0] node_anc_end = arc[1] transcription.remove_edge(arc[len(arc) - 1], node_anc_end) transcription.add_edge(arc[len(arc) - 1], node_end, speech=word, confidence=confidence) transcription.add_edge(node_end, node_anc_end) arc.append(node_end) node_end = arc[1] else: arc = [node_start, node_end] arc_data = {'speech': word, 'confidence': confidence} if addEdge: transcription.add_edge(node_start, node_end, speech=word, confidence=confidence) previousNode = node_end transcription.add_edge(previousNode, TEnd) self._loaded = transcription return self
def get_transcription(self, path2ctm): T.reset() transcription = Transcription() with codecs.open(path2ctm, "rt", encoding='utf8') as f: lines = f.readlines() # last word previous = (None, TStart) # last word with duration greater than 0 previous_with_duration = (None, TStart) for line in lines: # skip comments if re.search(r'^;;', line): continue fields = line.strip().split() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ word = fields[4] # remove punctuations if not self.punctuation: word = re.sub(r'[\.!,;?":]+',' ', word) word = re.sub(r' +',' ', word) word = word.strip() if not word: continue # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ confidence = fields[5] start_time = round(float(fields[2]), 3) duration = round(float(fields[3]), 3) end_time = float(start_time)+float(duration) end_time = round(end_time, 3) if duration > 0.: start = T(start_time) end = T(end_time) # if current word starts before the end of previous word, # make it start exactly at the end of previous word if start < previous_with_duration[1]: start = previous_with_duration[1] # if current word starts after the end of previous word, # add an empty edge if start > previous_with_duration[1]: transcription.add_edge(previous_with_duration[1], start) # add word and update `previous` transcription.add_edge(start, end, speech=word, confidence=confidence) previous_with_duration = (start, end) previous = (start, end) else: start = previous end = T() if duration == 0: node_start = previousNode node_end = T() if len(arc) == 2: transcription.remove_edge(arc[0], arc[1]) transcription.add_edge(arc[0], node_end, **arc_data) node_inter = T() transcription.add_edge(node_end, node_inter, speech=word, confidence=confidence) transcription.add_edge(node_inter, arc[1]) arc.append(node_end) arc.append(node_inter) node_end=arc[1] elif len(arc) > 2: node_anc_start = arc[0] node_anc_end = arc[1] transcription.remove_edge(arc[len(arc)-1], node_anc_end) transcription.add_edge(arc[len(arc)-1], node_end, speech=word, confidence=confidence) transcription.add_edge(node_end, node_anc_end) arc.append(node_end) node_end=arc[1] else: addEdge = True node_start = T(start_time) node_end = T(end_time) if previousNode.drifting: if not transcription.has_edge(previousNode, node_start): transcription.add_edge(previousNode, node_start) else: if node_start < previousNode: node_start = previousNode elif node_start > previousNode: transcription.add_edge(previousNode, node_start) if node_start.anchored and node_end.anchored: if node_start == node_end: addEdge = False node_start = previousNode node_end = T() if len(arc) == 2: transcription.remove_edge(arc[0], arc[1]) transcription.add_edge(arc[0], node_end, **arc_data) node_inter = T() transcription.add_edge(node_end, node_inter, speech=word, confidence=confidence) transcription.add_edge(node_inter, arc[1]) arc.append(node_end) arc.append(node_inter) node_end=arc[1] elif len(arc) > 2: node_anc_start = arc[0] node_anc_end = arc[1] transcription.remove_edge(arc[len(arc)-1], node_anc_end) transcription.add_edge(arc[len(arc)-1], node_end, speech=word, confidence=confidence) transcription.add_edge(node_end, node_anc_end) arc.append(node_end) node_end=arc[1] else: arc = [node_start, node_end] arc_data = {'speech':word, 'confidence':confidence} if addEdge: transcription.add_edge(node_start, node_end, speech=word, confidence=confidence) previousNode=node_end transcription.add_edge(previousNode, TEnd) return transcription
def __call__(self, words, sentences): """ Parameters ---------- words : `pyannote.core.Transcription` sentences : `pyannote.core.Transcription` Returns ------- sentences : `pyannote.core.Transcription` """ lastIndexNode=0 end = False T.reset() sentences, mapping = sentences.relabel_drifting_nodes() words, _ = words.relabel_drifting_nodes() sentences, mapping = sentences.relabel_drifting_nodes(mapping=mapping) nodesWords = nx.topological_sort(words) if nodesWords[lastIndexNode] == TStart: lastIndexNode += 1 last = -1 next = -1 first_node = None first = -1 for t1, t2, data in sentences.ordered_edges_iter(data=True): if 'speech' not in data: continue sentence = data['speech'] speaker = data['speaker'] sentenceClean = self._clean_sentence(sentence) if not self.punctuation: sentenceClean = re.sub(r'[\.!,;?":]+','', sentenceClean) if sentenceClean != "": sentenceWords = "" if lastIndexNode < len(nodesWords): if first_node is None and t1 != TStart: first_node = t1 sentences.add_edge(first_node, nodesWords[lastIndexNode]) node_manual_trs_start = t1 node_manual_trs_end = t2 node_float = T() remainingData = None if last > 0 and next > 0: for key in words[last][next]: dataWord = words[last][next][key] if 'speech' in dataWord: remainingData = dataWord sentenceWords = remainingData['speech'] sentenceWords = self._clean_sentence(sentenceWords) last = -1 next = -1 bAlreadyAdded = False if(remainingData is not None): if 'speech' in remainingData: remainingData['speaker']=speaker sentences.add_edge(node_manual_trs_start, nodesWords[lastIndexNode], **remainingData) if sentenceWords == sentenceClean: sentences.add_edge(nodesWords[lastIndexNode], node_manual_trs_end) bAlreadyAdded = True if not bAlreadyAdded: if not sentences.has_edge(node_manual_trs_start, nodesWords[lastIndexNode]): sentences.add_edge(node_manual_trs_start, nodesWords[lastIndexNode]) node_end = "" previousNode = None while not end and lastIndexNode < len(nodesWords): node = nodesWords[lastIndexNode] for node2 in sorted(words.successors(node)): node_start = node node_end = node2 if previousNode is not None: if not sentences.has_edge(previousNode, node_start) and previousNode != node_start : sentences.add_edge(previousNode, node_start) for key in words[node][node2]: dataWord = words[node][node2][key] if 'speech' in dataWord: dataWord['speaker']=speaker sentences.add_edge(node_start, node_end, **dataWord) if 'speech' in dataWord: if sentenceWords == "": sentenceWords = dataWord['speech'] else: sentenceWords += " " + dataWord['speech'] sentenceWords = self._clean_sentence(sentenceWords) if sentenceWords == sentenceClean: if re.search(r'[\.!,;?":]$', sentenceClean): #Have to add the next anchored just before the end of the speech turn ... lastIndexNode+= 2 if lastIndexNode < len(nodesWords): node = nodesWords[lastIndexNode] if node.anchored: sentences.add_edge(node_end, node) node_end = node lastIndexNode -= 1 else: lastIndexNode -= 2 end = True previousNode = node_end lastIndexNode+=1 if lastIndexNode+1 < len(nodesWords): last = nodesWords[lastIndexNode] next = nodesWords[lastIndexNode+1] #print "%s -> %s" % (node_end, node_manual_trs_end) lastIndexNode+=1 sentences.add_edge(node_end, node_manual_trs_end) end = False elif sentenceClean != "": print "Unable to align '%s' !" % (sentenceClean) return None return sentences