Exemple #1
0
    def pick(self, values: Sequence[str]) -> Optional[str]:
        """From a set of names, pick the most plausible user-facing one."""
        # Sort to get stable results when it's a coin toss:
        values = sorted(values)
        if not len(values):
            return None
        normalised = []
        lookup: Dict[str, List[str]] = {}
        # We're doing this in two stages, to avoid name forms with varied casing
        # (e.g. Smith vs. SMITH) are counted as widly different, leading to
        # implausible median outcomes.
        for value in values:
            norm = slugify(value, sep=" ")
            if norm is None:
                continue
            normalised.append(norm)
            lookup.setdefault(norm, [])
            lookup[norm].append(value)

        norm = setmedian(normalised)
        if norm is None:
            return None
        forms = lookup.get(norm, [])
        if len(forms) <= 1:
            return first(forms)
        return cast(str, setmedian(forms))
 def pick(self, values):
     values = [sanitize_text(v) for v in ensure_list(values)]
     values = [v for v in values if v is not None]
     if not len(values):
         return None
     if 1 == len(values):
         return values[0]
     return setmedian(values)
Exemple #3
0
    def entities(self):
        for (key, type_), tags in self.tags.items():
            # skip entities that do not meet a threshold of relevance:
            cutoff = self.type_cutoff(type_)
            if len(tags) < cutoff:
                continue

            label = tags[0]
            if type_ == registry.name and len(set(tags)) > 0:
                label = setmedian(tags)
            yield label, type_
Exemple #4
0
 def pick(self, values):
     values = [sanitize_text(v) for v in ensure_list(values)]
     values = [v for v in values if v is not None]
     if len(values) <= 1:
         return first(values)
     return setmedian(sorted(values))
Exemple #5
0
def select_label(labels):
    return setmedian(labels)
Exemple #6
0
 def label(self):
     if not self.strict and len(self.labels) > 1:
         return setmedian(self.labels)
     return self.labels[0]