Esempio n. 1
0
 def fuzzily_get_email_full_name(self, email: str) -> FullName:
     email = email.lower()
     email_prefix = email.split(sep='@', maxsplit=1)[0]
     cleaned = re.sub(r'(?:^[\d]+)|(?:[+.\-_\d]+$)', '', email_prefix)
     lemmatizer = nltk.stem.WordNetLemmatizer()
     lemma = lemmatizer.lemmatize(cleaned)
     if lemma in self.words_set:
         return FullName('', '')
     for func in self.emails_full_names_functions:
         full_name = func(cleaned)
         if full_name is not None and not full_name.is_empty():
             return full_name
     return FullName('', '')
Esempio n. 2
0
 def get_email_first_name_near_perfect_match(self, prefix: str) -> FullName:
     first_name = ''
     last_name = ''
     candidate = '' if len(prefix) < 4 else prefix[:-1]
     if candidate in all_first_names_set:
         first_name = candidate
     return FullName(first_name, last_name)
Esempio n. 3
0
 def get_email_full_name_perfect_match(self, prefix: str) -> FullName:
     first_name = ''
     last_name = ''
     for i in range(len(prefix) - 3, 2, -1):
         if prefix[:i] in all_first_names_set and prefix[
                 i:] in all_last_names_set:
             first_name = prefix[:i]
             last_name = prefix[i:]
             break
     return FullName(first_name, last_name)
Esempio n. 4
0
 def get_email_full_name_using_sep(self, prefix: str) -> FullName:
     first_name = ''
     last_name = ''
     split = re.split(r'[+.\-_\d]+', prefix)
     if len(split) > 1:
         first_name_candidate = re.sub(r'(?:^[\d]+)|(?:[\d]+$)', '',
                                       split[0])
         if first_name_candidate in all_first_names_set:
             first_name = first_name_candidate
         last_name_candidate = re.sub(r'(?:^[\d]+)|(?:[\d]+$)', '',
                                      split[-1])
         if last_name_candidate in all_last_names_set:
             last_name = last_name_candidate
         full_name = FullName(first_name, last_name)
         if not full_name.is_full():
             full_name_reversed = self.get_email_full_name_using_sep_reversed(
                 prefix)
             if full_name_reversed.is_full():
                 return full_name_reversed
             else:
                 return full_name
     return FullName(first_name, last_name)
Esempio n. 5
0
 def get_email_full_name_using_sep_reversed(self, prefix: str) -> FullName:
     first_name = ''
     last_name = ''
     split = re.split(r'[+.\-_\d]+', prefix)
     if len(split) > 1:
         first_name_candidate = re.sub(r'(?:^[\d]+)|(?:[\d]+$)', '',
                                       split[1])
         if first_name_candidate in all_first_names_set:
             first_name = first_name_candidate
         last_name_candidate = re.sub(r'(?:^[\d]+)|(?:[\d]+$)', '',
                                      split[0])
         if last_name_candidate in all_last_names_set:
             last_name = last_name_candidate
     return FullName(first_name, last_name)
Esempio n. 6
0
 def get_email_last_name_perfect_match(self, prefix: str) -> FullName:
     first_name = ''
     last_name = ''
     if prefix in all_last_names_set:
         last_name = prefix
     return FullName(first_name, last_name)