def process(self, message: Message, **kwargs: Any) -> None: entities = message.get("entities", []) new_entities = [] for entity in entities: config = self._find_entity(entity, self.entities) if config is None or not isinstance(entity["value"], str): new_entities.append(entity) continue matches = process.extract(entity["value"], self.gazette.get(entity["entity"], []), limit=self.limit, scorer=config["mode"]) primary, score = matches[0] if len(matches) else (None, None) if primary is not None and score > config["min_score"]: entity["value"] = primary entity["gazette_matches"] = [{ "value": value, "score": num } for value, num in matches] new_entities.append(entity) message.set("entities", new_entities)
def process(self, message, **kwargs): # type: (Message, **Any) -> None self._load_config() entities = message.get("entities", []) limit = self.component_config.get("max_num_suggestions") new_entities = [] for entity in entities: config = _find_entity_config(entity, self.component_config) if config is None or not isinstance(entity["value"], str): new_entities.append(entity) continue matches = process.extract(entity["value"], self.gazette.get(entity["entity"], []), limit=limit, scorer=config["mode"]) primary, score = matches[0] if len(matches) else (None, None) if primary is not None and score > config["min_score"]: entity["value"] = primary entity["gazette_matches"] = [{ "value": value, "score": num } for value, num in matches] new_entities.append(entity) message.set("entities", new_entities)
def test_ratio(): query = "orange" val = [ 'blue', 'orange', 'brown', 'ornage', 'range', 'angel', 'gang', 'ang' ] fuzzy = process.extract(query, val, limit=3, scorer='ratio') assert fuzzy == [('orange', 100), ('range', 83), ('ornage', 66)]
def test_partial_ratio(): query = "orange" val = ['blue tango', 'orange tango', 'brown tango'] fuzzy = process.extract(query, val, limit=3, scorer='partial_ratio') assert fuzzy == [('orange tango', 100), ('blue tango', 50), ('brown tango', 50)]
def _find_matches(query, gazette, mode="ratio", limit=5): output = {} for key, val in gazette.items(): output[key] = process.extract(query, val, limit=limit, scorer=mode) return output