def test_corpus_dump(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_speaker("alice").meta['surname'] = 1.0 corpus1.dump('test_index_meta_corpus', base_path="./") corpus2 = Corpus(filename="test_index_meta_corpus") self.assertEqual(corpus1.meta_index.utterances_index, corpus2.meta_index.utterances_index) self.assertEqual(corpus1.meta_index.speakers_index, corpus2.meta_index.speakers_index) self.assertEqual(corpus1.meta_index.conversations_index, corpus2.meta_index.conversations_index) self.assertEqual(corpus1.meta_index.overall_index, corpus2.meta_index.overall_index)
def test_key_insertion_deletion(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_speaker("alice").meta['surname'] = 1.0 self.assertEqual(corpus1.meta_index.utterances_index['foo'], str(type('bar'))) self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'], str(type(1))) self.assertEqual(corpus1.meta_index.speakers_index['surname'], str(type(1.0))) # test that deleting a key from an utterance removes it from the index del corpus1.get_utterance("2").meta['hey'] self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['hey']) # test that deleting a key from an utterance removes it from the index and from all other objects of same type del corpus1.get_utterance("1").meta['foo'] self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['foo']) self.assertRaises(KeyError, lambda: corpus1.get_utterance("0").meta["foo"])
def transform(self, corpus: Corpus) -> Corpus: """Computes the count of pause and hesitancy words for each utterance, then aggregates them for each conversation :param corpus: the corpus to compute features for. :type corpus: Corpus """ if self.verbose: print("Finding counts of pause and hesitancy words...") pause_words = [ 'um', 'umm', 'ummm', 'uh', 'uhh', 'uhhh', 'hm', 'hmm', 'hmmm', 'er', 'err', 'uh huh', 'huh', 'mhm', 'mhmm', 'erm', '...', 'ah', 'ahh', 'ahem', 'eh', 'ehh', 'ehhh', 'meh' ] hesitant_words = [ 'maybe', 'not', 'sure', 'unsure', 'probably', 'well', 'okay', 'like', 'actually', 'basically', 'seriously', 'totally', 'literally', 'know', 'mean', 'guess', 'suppose', 'but', 'something', 'so', 'wow', 'just', 'really', 'later', 'wait', 'future', 'almost', 'slightly', 'perhaps', 'somehow', 'sort', 'kind', 'little', 'somewhat', 'hey', 'alas', 'see', 'sounds', 'ok', 'roughly', 'why', 'how', 'yep', 'yup', 'may', 'possibly', 'might', 'could', 'doubt', 'skeptical', 'don\'t', 'won\'t', 'nah' ] pause = [] hesitancy = [] allutterids = corpus.get_utterance_ids() for i in list(range(0, len(allutterids))): utter_id = allutterids[i] text = corpus.get_utterance(utter_id).text textcleaned = "".join( c for c in text if c not in ('!', '.', ':', '?', '\'', ',', '\"', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '~', '`', '_', '+', '=', '>', '<', '[', ']', '{', '}')) textlist = textcleaned.split() npause = len([i for i in textlist if i in pause_words]) nhesitant = len([i for i in textlist if i in hesitant_words]) pause.append( npause) #gives number of pause words in each utterance hesitancy.append( nhesitant) #gives number of hesitant words in each utterance corpus.get_utterance(utter_id).meta[self.NAME1] = npause corpus.get_utterance(utter_id).meta[self.NAME2] = nhesitant allconvoids = corpus.get_conversation_ids() for i in list(range(0, len(allconvoids))): convo_id = allconvoids[i] convo_utters = corpus.get_conversation(convo_id)._utterance_ids avgpause = np.mean(np.asarray(pause)[np.asarray(convo_utters)]) avghesitancy = np.mean( np.asarray(hesitancy)[np.asarray(convo_utters)]) corpus.get_conversation(convo_id)._meta[self.NAME3] = avgpause corpus.get_conversation(convo_id)._meta[self.NAME4] = avghesitancy return corpus
def transform(self, corpus: Corpus): for character in corpus.get_usernames(): user1 = corpus.get_user(character) utterances = user1.get_utterance_ids() utterances_per_conversation = [] conversations = [] for uid in utterances: utterance = corpus.get_utterance(uid) conversation = corpus.get_conversation(utterance.root) conversations.append(utterance.root) utterances_per_conversation.append( (utterance.root, len(conversation.get_usernames()), len(conversation.get_utterance_ids()))) first_last = 0 if uid in (utterance.root, list(conversation.get_utterance_ids())[-1]): first_last += 1 raw_count = len(utterances) / len(list(corpus.utterances.values())) total_conversations = len(set(conversations)) #bootstrapping iterations = 0 for i in range(20): samples = random.choices(utterances, k=25) #for politeness complexity# politeness_rows = [] #many operations# for uid in samples: politeness_rows.append( list( corpus.get_utterance( uid).meta["politeness_strategies"].values())) #politeness# politeness_results = np.sum(politeness_rows, 0) politeness_results_count = len([ i / len(politeness_rows) for i in politeness_results if i != 0.0 ]) / len(politeness_rows) iterations += politeness_results_count #politness_final# politeness_final = iterations / 20 #first/last# first_last_count = first_last / total_conversations #utterances_per_conversation# utterances_per_conversations = Counter(utterances_per_conversation) upc_final = [] for k, v in utterances_per_conversations.items(): average = k[2] / k[1] upc_final.append(v / average) upc_count = sum(upc_final) / len(utterances_per_conversations) user1.add_meta('politeness_complexity', politeness_final) user1.add_meta('utterance_per_conversation', upc_count) user1.add_meta('first_last_word', first_last_count) user1.add_meta('raw_count', raw_count) return (corpus)
def transform(self, corpus: Corpus) -> Corpus: """Adds the ARI score to the metadata table of each utterance in the corpus. :return: corpus, modified with ARI and Flesch-Kincaid grade level scores assigned to each utterance """ utt_ids = corpus.get_utterance_ids() for utt_id in utt_ids: #add scores to each utterances metadata corpus.get_utterance(utt_id).meta['ARI'] = ARI( corpus.get_utterance(utt_id).text) corpus.get_utterance( utt_id).meta['Flesch-Kincaid'] = Flesch_Kincaid( corpus.get_utterance(utt_id).text) return corpus
def test_basic_functions(self): """ Test basic meta functions """ corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) first_utt = corpus1.get_utterance("0") first_utt.meta['hey'] = 9 # correct class type stored self.assertEqual(corpus1.meta_index.utterances_index['hey'], repr(type(9))) # keyErrors result in None output self.assertRaises(KeyError, lambda: first_utt.meta['nonexistent key']) # test that setting a custom get still works self.assertEqual(first_utt.meta.get('nonexistent_key', {}), {})
def transform(self, corpus: Corpus) -> Corpus: """Computes the average number of questions asked in a conversation :param corpus: the corpus to compute features for. :type corpus: Corpus """ if self.verbose: print("Finding questions per utterance") questions = [] allutterids = corpus.get_utterance_ids() for i in list(range(0, len(allutterids))): utter_id = allutterids[i] text = corpus.get_utterance(utter_id).text nquestions = len(re.findall(r'\?+', text)) questions.append( nquestions) #gives number of questions in each utterance if self.verbose: print("Finding questions per conversation") allconvoids = corpus.get_conversation_ids() for i in list(range(0, len(allconvoids))): convo_id = allconvoids[i] convo_utters = corpus.get_conversation(convo_id)._utterance_ids avgquestion = np.mean( np.asarray(questions)[np.asarray(convo_utters)]) corpus.get_conversation(convo_id)._meta[ self.ATTR_NAME] = avgquestion #adds average questions per conversation to conversation metadata return corpus
def get_scores(self, corpus: Corpus, selector: Optional[Callable[[], bool]] = None): """ Calculates average occurance per utterance. Used in summarize() :param corpus: the corpus used to compute averages :param selector: lambda function which takes in meta data and returns a boolean. """ utts = [corpus.get_utterance(x) for x in corpus.get_utterance_ids()] if self.MRKR_NAME not in utts[0].meta: corpus = self.transform(corpus, markers=True) if selector != None: utts = [x for x in utts if selector(x.meta)] if len(utts) == 0: raise Exception("No query matches") counts = { k[21:len(k) - 2]: 0 for k in utts[0].meta[self.MRKR_NAME].keys() } for utt in utts: for k, v in utt.meta[self.MRKR_NAME].items(): counts[k[21:len(k) - 2]] += len(v) scores = {k: v / len(utts) for k, v in counts.items()} return scores
def transform(self, corpus: Corpus): for convo in corpus.iter_conversations(): reciprocal = 0 onesided = 0 user_to_targets = dict() for user in convo.iter_users(): user_to_targets[user.name] = { corpus.get_utterance(utt.reply_to).user.name for utt in user.iter_utterances() if utt.reply_to is not None } for user1, user2 in combinations(convo.iter_users(), 2): user1_to_user2 = user2.name in user_to_targets[user1.name] user2_to_user1 = user1.name in user_to_targets[user2.name] if user1_to_user2 and user2_to_user1: reciprocal += 1 elif user1_to_user2 or user2_to_user1: onesided += 1 if reciprocal + onesided == 0: reciprocity_pct = 0 else: reciprocity_pct = reciprocal / (reciprocal + onesided) convo.add_meta('reciprocity', reciprocity_pct) return corpus
def transform(self, corpus: Corpus): """Extract politeness strategies from each utterances in the corpus and annotate the utterances with the extracted strategies. Requires that the corpus has previously been transformed by a Parser, such that each utterance has dependency parse info in its metadata table. :param corpus: the corpus to compute features for. :type corpus: Corpus """ # preprocess the utterances in the format expected by the API if self.verbose: print("Preprocessing comments...") comment_ids, processed_comments = self._preprocess_utterances(corpus) # use the bundled politeness API to extract politeness features for each # preprocessed comment if self.verbose: print("Extracting politeness strategies...") feature_dicts = [ get_politeness_strategy_features(doc) for doc in processed_comments ] # add the extracted strategies to the utterance metadata for utt_id, strats in zip(comment_ids, feature_dicts): corpus.get_utterance(utt_id).meta[self.ATTR_NAME] = strats return corpus
def transform(self, corpus: Corpus) -> Corpus: """Runs the SpaCy parser on each utterance in the corpus, and adds the parses to the utterance metadata table. :return: corpus, modified with parses assigned to each utterance """ utt_ids = corpus.get_utterance_ids() # if the user specifies multithreading, we will enable parallelized parsing # using spacy.pipe. Otherwise we will operate sequentially. if self.n_threads == 1: spacy_iter = (self.spacy_nlp(corpus.get_utterance(utt_id).text) for utt_id in utt_ids) else: spacy_iter = self.spacy_nlp.pipe((corpus.get_utterance(utt_id).text for utt_id in utt_ids), n_threads=self.n_threads) # add the spacy parses to the utterance metadata for utt_id, parsed in zip(utt_ids, spacy_iter): corpus.get_utterance(utt_id).meta[self.ATTR_NAME] = _remove_tensor(parsed) return corpus
def fit_transform(self, corpus: Corpus) -> Corpus: """ Groups threads together into communities. :param corpus: the Corpus to use :return: Modifies and returns Corpus with new meta key: "communityEmbedder", value: Dict, containing "pts": an array with rows corresponding to embedded communities, and "labels": an array whose ith entry is the community of the ith row of X. """ if self.community_key is None: raise RuntimeError( "Must specify community_key to retrieve label information from utterance" ) corpus_meta = corpus.get_meta() if "threadEmbedder" not in corpus_meta: raise RuntimeError( "Missing threadEmbedder metadata: " "threadEmbedder.fit_transform() must be run on the Corpus first" ) thread_embed_data = corpus_meta["threadEmbedder"] X_mid = thread_embed_data["X"] roots = thread_embed_data["roots"] if self.method.lower() == "svd": f = TruncatedSVD elif self.method.lower() == "tsne": f = TSNE elif self.method.lower() == "none": f = None else: raise Exception("Invalid embed_communities embedding method") if f is not None: X_embedded = f(n_components=self.n_components).fit_transform(X_mid) else: X_embedded = X_mid labels = [ corpus.get_utterance(root).get("meta")[self.community_key] for root in roots ] # label_counts = Counter(labels) subs = defaultdict(list) for x, label in zip(X_embedded, labels): subs[label].append(x / np.linalg.norm(x)) labels, subs = zip(*subs.items()) pts = [np.mean(sub, axis=0) for sub in subs] retval = {"pts": pts, "labels": labels} corpus.add_meta("communityEmbedder", retval) return corpus
def test_multiple_types(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus1.get_utterance('2').meta['hey'] = None self.assertEqual(corpus1.meta_index.utterances_index.get('hey', None), None) corpus1.get_utterance('0').meta['hey'] = 5 self.assertEqual(corpus1.meta_index.utterances_index['hey'], [str(type(5))]) corpus1.get_utterance('1').meta['hey'] = 'five' self.assertEqual(corpus1.meta_index.utterances_index['hey'], [str(type(5)), str(type('five'))])
def test_corpus_merge_add(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", user=User(id="alice")), Utterance(id="1", text="my name is bob", user=User(id="bob")), Utterance(id="2", text="this is a test", user=User(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' # test that adding separately initialized utterances with new metadata updates Index new_utt = Utterance(id="4", text="hello world", user=User(id="alice", meta={'donkey': 'kong'}), meta={'new': 'meta'}) new_corpus = corpus1.add_utterances([new_utt]) self.assertTrue('new' in new_corpus.meta_index.utterances_index) self.assertTrue('donkey' in new_corpus.meta_index.users_index)
def test_key_insertion_deletion(self): corpus1 = Corpus(utterances=[ Utterance(id="0", text="hello world", speaker=Speaker(id="alice")), Utterance(id="1", text="my name is bob", speaker=Speaker( id="bob")), Utterance( id="2", text="this is a test", speaker=Speaker(id="charlie")), ]) corpus1.get_utterance("0").meta['foo'] = 'bar' corpus1.get_utterance("1").meta['foo'] = 'bar2' corpus1.get_utterance("2").meta['hey'] = 'jude' corpus1.get_conversation(None).meta['convo_meta'] = 1 corpus1.get_speaker("alice").meta['surname'] = 1.0 self.assertEqual(corpus1.meta_index.utterances_index['foo'], [str(type('bar'))]) self.assertEqual(corpus1.meta_index.conversations_index['convo_meta'], [str(type(1))]) self.assertEqual(corpus1.meta_index.speakers_index['surname'], [str(type(1.0))]) # test that deleting an attribute from an individual utterance fails to remove it del corpus1.get_utterance("2").meta['hey'] corpus1.get_utterance("2").meta['hey'] # test that delete_metadata works corpus1.delete_metadata('utterance', 'foo') self.assertRaises(KeyError, lambda: corpus1.meta_index.utterances_index['foo']) self.assertRaises(KeyError, lambda: corpus1.get_utterance("0").meta["foo"])
def transform(self, corpus: Corpus): ''' creates and populates user, convo aggregates. :param corpus: the Corpus to transform. :type corpus: Corpus ''' for user in corpus.iter_users(): if 'conversations' not in user.meta: continue for convo_id, convo in user.meta['conversations'].items(): utterance_attrs = [corpus.get_utterance(utt_id).meta[self.attr_name] for utt_id in convo['utterance_ids']] user.meta['conversations'][convo_id][self.attr_name] = self.agg_fn(utterance_attrs) return corpus
def transform(self, corpus: Corpus): ''' creates and populates user, convo aggregates. :param corpus: the Corpus to transform. :type corpus: Corpus ''' for user in corpus.iter_users(): if 'conversations' not in user.meta: continue for convo_id, convo in user.meta['conversations'].items(): if self.recompute or (corpus.get_user_convo_info( user.name, convo_id, self.output_field) is None): utterance_attrs = [ corpus.get_utterance(utt_id).meta[self.attr_name] for utt_id in convo['utterance_ids'] ] corpus.set_user_convo_info(user.name, convo_id, self.output_field, self.agg_fn(utterance_attrs)) return corpus
def score(self, corpus: Corpus, speakers: Collection[Union[User, str]], group: Collection[Union[User, str]], focus: str = "speakers", speaker_thresh: int = 0, target_thresh: int = 3, utterances_thresh: int = 0, speaker_thresh_indiv: int = 0, target_thresh_indiv: int = 0, utterances_thresh_indiv: int = 0, utterance_thresh_func: Optional[Callable[ [Tuple[Utterance, Utterance]], bool]] = None, split_by_attribs: Optional[List[str]] = None, speaker_attribs: Optional[Dict] = None, target_attribs: Optional[Dict] = None) -> CoordinationScore: """Computes the coordination scores for each speaker, given a set of speakers and a group of targets. :param corpus: Corpus to compute scores on :param speakers: A collection of user ids or user objects corresponding to the speakers we want to compute scores for. :param group: A collection of user ids or user objects corresponding to the group of targets. :param focus: Either "speakers" or "targets". If "speakers", treat the set of targets for a particular speaker as a single person (i.e. concatenate all of their utterances); the returned dictionary will have speakers as keys. If "targets", treat the set of speakers for a particular target as a single person; the returned dictionary will have targets as keys. :param speaker_thresh: Thresholds based on minimum number of times the speaker uses each coordination marker. :param target_thresh: Thresholds based on minimum number of times the target uses each coordination marker. :param utterances_thresh: Thresholds based on the minimum number of utterances for each speaker. :param speaker_thresh_indiv: Like `speaker_thresh` but only considers the utterances between a speaker and a single target; thresholds whether the utterances for a single target should be considered for a particular speaker. :param target_thresh_indiv: Like `target_thresh` but thresholds whether a single target's utterances should be considered for a particular speaker. :param utterances_thresh_indiv: Like `utterances_thresh` but thresholds whether a single target's utterances should be considered for a particular speaker. :param utterance_thresh_func: Optional utterance-level threshold function that takes in a speaker `Utterance` and the `Utterance` the speaker replied to, and returns a `bool` corresponding to whether or not to include the utterance in scoring. :param split_by_attribs: Utterance meta attributes to split users by when tallying coordination (e.g. in supreme court transcripts, you may want to treat the same lawyer as a different person across different cases --- see coordination examples) :param speaker_attribs: attribute names and values the speaker must have :param target_attribs: attribute names and values the target must have :return: A :class:`CoordinationScore` object corresponding to the coordination scores for each speaker. """ if corpus != self.corpus: raise Exception("Coordination: must fit and score on same corpus") if not self.precomputed: raise Exception("Must fit before calling score") if split_by_attribs is None: split_by_attribs = [] if speaker_attribs is None: speaker_attribs = dict() if target_attribs is None: target_attribs = dict() #self.precompute() speakers = set(speakers) group = set(group) utterances = [] for utt in corpus.iter_utterances(): speaker = utt.user if speaker in speakers: if utt.reply_to is not None: reply_to = corpus.get_utterance(utt.reply_to) target = reply_to.user if target in group: utterances.append(utt) return self.scores_over_utterances( corpus, speakers, utterances, speaker_thresh, target_thresh, utterances_thresh, speaker_thresh_indiv, target_thresh_indiv, utterances_thresh_indiv, utterance_thresh_func, focus, split_by_attribs, speaker_attribs, target_attribs)
def scores_over_utterances( self, corpus: Corpus, speakers: Collection[Union[User, str]], utterances, speaker_thresh: int, target_thresh: int, utterances_thresh: int, speaker_thresh_indiv: int, target_thresh_indiv: int, utterances_thresh_indiv: int, utterance_thresh_func: Optional[Callable[ [Tuple[Utterance, Utterance]], bool]] = None, focus: str = "speakers", split_by_attribs: Optional[List[str]] = None, speaker_attribs: Optional[Dict] = None, target_attribs: Optional[Dict] = None) -> CoordinationScore: assert not isinstance(speakers, str) assert focus == "speakers" or focus == "targets" if split_by_attribs is None: split_by_attribs = [] if speaker_attribs is None: speaker_attribs = {} if target_attribs is None: target_attribs = {} tally = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) cond_tally = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) cond_total = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) n_utterances = defaultdict(lambda: defaultdict(int)) targets = defaultdict(set) real_speakers = set() for utt2 in utterances: if corpus.has_utterance(utt2.reply_to): speaker = utt2.user utt1 = corpus.get_utterance(utt2.reply_to) target = utt1.user if speaker == target: continue speaker, target = Coordination._annot_user(speaker, utt2, split_by_attribs), \ Coordination._annot_user(target, utt1, split_by_attribs) speaker_has_attribs = Coordination._utterance_has_attribs( utt2, speaker_attribs) target_has_attribs = Coordination._utterance_has_attribs( utt1, target_attribs) if not speaker_has_attribs or not target_has_attribs: continue real_speakers.add(speaker) if utterance_thresh_func is None or \ utterance_thresh_func(utt2, utt1): if focus == "targets": speaker, target = target, speaker targets[speaker].add(target) n_utterances[speaker][target] += 1 for cat in utt1.meta["liwc-categories"].union( utt2.meta["liwc-categories"]): if cat in utt2.meta["liwc-categories"]: tally[speaker][cat][target] += 1 if cat in utt1.meta["liwc-categories"]: cond_total[speaker][cat][target] += 1 if cat in utt2.meta["liwc-categories"]: cond_tally[speaker][cat][target] += 1 out = CoordinationScore() if focus == "targets": speaker_thresh, target_thresh = target_thresh, speaker_thresh speaker_thresh_indiv, target_thresh_indiv = target_thresh_indiv, speaker_thresh_indiv real_speakers = list(targets.keys()) for speaker in real_speakers: if speaker[0] not in speakers and focus != "targets": continue coord_w = {} # coordination score wrt a category for cat in CoordinationWordCategories: threshed_cond_total = 0 threshed_cond_tally = 0 threshed_tally = 0 threshed_n_utterances = 0 for target in targets[speaker]: if tally[speaker][cat][target] >= speaker_thresh_indiv and \ cond_total[speaker][cat][target] >= target_thresh_indiv and \ n_utterances[speaker][target] >= utterances_thresh_indiv: threshed_cond_total += cond_total[speaker][cat][target] threshed_cond_tally += cond_tally[speaker][cat][target] threshed_tally += tally[speaker][cat][target] threshed_n_utterances += n_utterances[speaker][target] if threshed_cond_total >= max(target_thresh, 1) and \ threshed_tally >= speaker_thresh and \ threshed_n_utterances >= max(utterances_thresh, 1): coord_w[cat] = threshed_cond_tally / threshed_cond_total - \ threshed_tally / threshed_n_utterances if len(coord_w) > 0: out[speaker if split_by_attribs else speaker[0]] = coord_w return out