def nlp(self, text): doc = self.nlp_model(text) to = [] when = [] body = [] persons = [] locked_ner_model = self.model_pool.get_shared_model("xx_ent_wiki_sm") with locked_ner_model: persons = ner.get_persons(locked_ner_model.acquire_model(), text) for token in doc: if token.dep_ == "TO": to.append(token.text) elif token.dep_ == "START": when.append(token.text) elif token.dep_ == "BODY": body.append(token.text) log.debug("%s %s", token.text, token.dep_) time = datetime.now() + timedelta(seconds=5) if len(when) > 0: time = tc.parse_time(when) to = ner.cross_check_names(to, persons) log.debug("Recipients: %s", ",".join(to)) _body = " ".join(body) return (to, time, _body)
def nlp(self, text): doc = self.nlp_model(text) to = [] start = [] end = [] body = [] persons = [] locked_ner_model = self.model_pool.get_shared_model("xx_ent_wiki_sm") with locked_ner_model: persons = ner.get_persons(locked_ner_model.acquire_model(), text) for token in doc: if token.dep_ == "TO": to.append(token.text) elif token.dep_ == "START": start.append(token.text) elif token.dep_ == "END": end.append(token.text) elif token.dep_ == "BODY": body.append(token.text) log.debug("%s %s", token.text, token.dep_) to = ner.cross_check_names(to, persons) log.debug("Recipients: %s", ",".join(to)) timespan = self.timespan(start, end) _body = " ".join(body) return (to, timespan, _body)
def test_same_input_same_output(self): """ Test that if tags and persons are the same, cross checking should also return the same output. """ tags = ["mark", "evert", "reggie"] cc = ner.cross_check_names(tags, tags) assert cc == tags
def test_full_name_returned(self): """ Test that the tags and corresponding full name returns the full name when cross checking. """ tags = ["mark", "anderson"] persons = ["mark anderson"] cc = ner.cross_check_names(tags, persons) assert persons == cc
def test_missing_tag(self): """ Test that when names are missing in persons that are found in tags, the cross check should append the missing tags to persons. """ tags = ["mark", "anderson", "evert"] persons = ["mark anderson"] expected = ["mark anderson", "evert"] cc = ner.cross_check_names(tags, persons) assert cc == expected
def nlp(self, text): doc = self.nlp_model(text) to = [] start = [] end = [] body = [] persons = [] locked_ner_model = self.model_pool.get_shared_model("xx_ent_wiki_sm") with locked_ner_model: persons = ner.get_persons(locked_ner_model.acquire_model(), text) for token in doc: if token.dep_ == "TO": to.append(token.text) elif token.dep_ == "START": start.append(token.text) elif token.dep_ == "END": end.append(token.text) elif token.dep_ == "BODY": body.append(token.text) log.debug("%s %s", token.text, token.dep_) to = ner.cross_check_names(to, persons) log.debug("Recipients: %s", ",".join(to)) start_time = datetime.now() if len(start) == 0: start_time = start_time + timedelta(seconds=5) else: start_time = tc.parse_time(start) end_time = 0 if len(end) == 0: end_time = start_time + timedelta( minutes=self.get_meeting_duration()) else: end_time = tc.parse_time(end) _body = " ".join(body) return (to, {"start": start_time, "end": end_time}, _body)
def nlp(self, text): doc = self.nlp_model(text) to = [] start = [] body = [] persons = [] nstart = [] locked_ner_model = self.model_pool.get_shared_model("xx_ent_wiki_sm") with locked_ner_model: persons = ner.get_persons(locked_ner_model.acquire_model(), text) for token in doc: if token.dep_ == "TO": to.append(token.text) elif token.dep_ == "START": start.append(token.text) elif token.dep_ == "BODY": body.append(token.text) elif token.dep_ == "NSTART": nstart.append(token.text) log.debug("%s %s", token.text, token.dep_) to = ner.cross_check_names(to, persons) log.debug("Recipients: %s", ",".join(to)) start_time = datetime.now() if len(start) == 0: start_time = start_time + timedelta(seconds=5) else: start_time = tc.parse_time(start) new_start_time = datetime.now() if len(nstart) == 0: new_start_time = new_start_time + timedelta(seconds=5) else: new_start_time = tc.parse_time(nstart) _body = {"summary": " ".join(body)} _body["new start"] = new_start_time return (to, start_time, _body)