Esempio n. 1
0
    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """Parse and normalize an email address.

        Returns None if this is not an email address.
        """
        email = strip_quotes(text)
        if email is None or not self.REGEX.match(email):
            return None
        mailbox, domain = email.rsplit("@", 1)
        # TODO: https://pypi.python.org/pypi/publicsuffix/
        # handle URLs by extracting the domain name
        domain = urlparse(domain).hostname or domain
        domain = domain.lower()
        domain = domain.rstrip(".")
        # handle unicode
        domain = domain.encode("idna").decode("ascii")
        if domain is not None and mailbox is not None:
            return "@".join((mailbox, domain))
        return None
Esempio n. 2
0
 def clean_text(
     self,
     text: str,
     fuzzy: bool = False,
     format: Optional[str] = None,
     proxy: Optional["EntityProxy"] = None,
 ) -> Optional[str]:
     """Basic clean-up."""
     name = strip_quotes(text)
     return collapse_spaces(name)
Esempio n. 3
0
    def clean_text(self, email, **kwargs):
        """Parse and normalize an email address.

        Returns None if this is not an email address.
        """
        email = strip_quotes(email)
        if not self.EMAIL_REGEX.match(email):
            return None
        mailbox, domain = email.rsplit('@', 1)
        domain = self.domains.clean(domain, **kwargs)
        if domain is not None and mailbox is not None:
            return '@'.join((mailbox, domain))
Esempio n. 4
0
 def clean_text(self, name, **kwargs):
     """Basic clean-up."""
     name = strip_quotes(name)
     name = collapse_spaces(name)
     return name