def address_spec(self, stream): ''' Extract a single address spec from a stream of input, always operates in strict mode. ''' # sanity check if stream is None: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit mailbox length if len(stream) > MAX_ADDRESS_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address length of ' + str(MAX_ADDRESS_LENGTH) + '.') self.stream = TokenStream(stream) addr = self._addr_spec() if addr: # optional whitespace self._whitespace() # if we hit the end of the stream, we have a valid inbox if self.stream.end_of_stream(): return addr return None
def _validate_primary(localpart): # length check l = len(localpart) if l < 4 or l > 32: return False # no more than one dot (.) if localpart.count('.') > 1: return False # Grammar: local-part -> alpha { [ dot | underscore ] ( alpha | num ) }" stream = TokenStream(localpart) # local-part must being with alpha alpa = stream.get_token(ALPHA) if alpa is None: return False while True: # optional dot or underscore token stream.get_token(DOT) or stream.get_token(UNDERSCORE) # alpha or numeric alpanum = stream.get_token(ALPHA) or stream.get_token(NUMERIC) if alpanum is None: break # alpha or numeric must be end of stream if not stream.end_of_stream(): return False return True
def _validate_primary(localpart): # length check l = len(localpart) if l < 4 or l > 32: return False # no more than one dot (.) if localpart.count(b'.') > 1: return False # Grammar: local-part -> alpha { [ dot | underscore ] ( alpha | num ) }" stream = TokenStream(localpart) # local-part must being with alpha alpa = stream.get_token(ALPHA) if alpa is None: return False while True: # optional dot or underscore token stream.get_token(DOT) or stream.get_token(UNDERSCORE) # alpha or numeric alpanum = stream.get_token(ALPHA) or stream.get_token(NUMERIC) if alpanum is None: break # alpha or numeric must be end of stream if not stream.end_of_stream(): return False return True
def _validate(localpart): stream = TokenStream(localpart) # get the gmail base (alpha, num, or dot) mpart = stream.get_token(GMAIL_BASE) if mpart is None: return False # optional tags tgs = _tags(stream) if not stream.end_of_stream(): return False return True
def _validate(localpart): stream = TokenStream(localpart) # get the google base mpart = stream.get_token(GOOGLE_BASE) if mpart is None: return False # optional tags tgs = _tags(stream) if not stream.end_of_stream(): return False return True
def _validate(localpart): stream = TokenStream(localpart) # get the hotmail base mpart = stream.get_token(HOTMAIL_BASE) if mpart is None: return False # optional tags tgs = _tags(stream) if not stream.end_of_stream(): return False return True
def _validate_disposable(email_addr): # Setup for handling EmailAddress type instead of literal string localpart = email_addr.mailbox managed = managed_email(email_addr.hostname) # length check (base + hyphen + keyword) l = len(localpart) if l < 3 or l > 65: return False # single hyphen if localpart.count('-') != 1: return False # base and keyword length limit parts = localpart.split('-') for part in parts: l = len(part) if l < 1 or l > 32: return False # Grammar: local-part -> alpha { [ alpha | num | underscore ] } hyphen { [ alpha | num ] } stream = TokenStream(localpart) # must being with alpha begin = stream.get_token(ALPHA) if begin is None and managed: return False while True: # alpha, num, underscore base = stream.get_token(ALPHANUM) or stream.get_token(UNDERSCORE) if base is None: break # hyphen hyphen = stream.get_token(HYPHEN) if hyphen is None: return False # keyword must be alpha, num stream.get_token(ALPHANUM) if not stream.end_of_stream(): return False return True
def _validate(localpart): stream = TokenStream(localpart) while True: # get alphanumeric portion mpart = stream.get_token(ALPHANUM) if mpart is None: return False # get optional dot, must be followed by more alphanumerics mpart = stream.get_token(DOT) if mpart is None: break # optional tags tgs = _tags(stream) if not stream.end_of_stream(): return False return True
def address_list(self, stream): ''' Extract a mailbox and/or url list from a stream of input, operates in strict and relaxed modes. ''' # sanity check if not stream: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit address list length if len(stream) > MAX_ADDRESS_LIST_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address list length of ' + str(MAX_ADDRESS_LIST_LENGTH) + '.') # set stream self.stream = TokenStream(stream) if self.strict is True: return self._address_list_strict() return self._address_list_relaxed()
def _validate_disposable(localpart): # length check (base + hyphen + keyword) l = len(localpart) if l < 3 or l > 65: return False # single hyphen if localpart.count('-') != 1: return False # base and keyword length limit parts = localpart.split('-') for part in parts: l = len(part) if l < 1 or l > 32: return False # Grammar: local-part -> alpha { [ alpha | num | underscore ] } hyphen { [ alpha | num ] } stream = TokenStream(localpart) # must being with alpha begin = stream.get_token(ALPHA) if begin is None: return False while True: # alpha, num, underscore base = stream.get_token(ALPHANUM) or stream.get_token(UNDERSCORE) if base is None: break # hyphen hyphen = stream.get_token(HYPHEN) if hyphen is None: return False # keyword must be alpha, num stream.get_token(ALPHANUM) if not stream.end_of_stream(): return False return True
def _validate_disposable(localpart): # length check (base + hyphen + keyword) l = len(localpart) if l < 3 or l > 65: return False # single hyphen if localpart.count(b'-') != 1: return False # base and keyword length limit parts = localpart.split(b'-') for part in parts: l = len(part) if l < 1 or l > 32: return False # Grammar: local-part -> alpha { [ alpha | num | underscore ] } hyphen { [ alpha | num ] } stream = TokenStream(localpart) # must being with alpha begin = stream.get_token(ALPHA) if begin is None: return False while True: # alpha, num, underscore base = stream.get_token(ALPHANUM) or stream.get_token(UNDERSCORE) if base is None: break # hyphen hyphen = stream.get_token(HYPHEN) if hyphen is None: return False # keyword must be alpha, num stream.get_token(ALPHANUM) if not stream.end_of_stream(): return False return True
def _validate(localpart): "Grammar: local-part -> alpha { [ dot | underscore ] ( alpha | num ) }" stream = TokenStream(localpart) # local-part must being with alpha alpa = stream.get_token(ALPHA) if alpa is None: return False while True: # optional dot or underscore token stream.get_token(DOT) or stream.get_token(UNDERSCORE) # alpha or numeric alpanum = stream.get_token(ALPHA) or stream.get_token(NUMERIC) if alpanum is None: break # alpha or numeric must be end of stream if not stream.end_of_stream(): return False return True
def _validate(localpart): stream = TokenStream(localpart) # localpart must start with alpha alpa = stream.get_token(ICLOUD_PREFIX) if alpa is None: return False while True: # optional dot or underscore stream.get_token(DOT) or stream.get_token(UNDERSCORE) base = stream.get_token(ICLOUD_BASE) if base is None: break if not stream.end_of_stream(): return False return True
def _validate_primary(email_addr): # Setup for handling EmailAddress type instead of literal string localpart = email_addr.mailbox managed = managed_email(email_addr.hostname) # length check l = len(localpart) if l < 4 or l > 32: return False # no more than one dot (.) if localpart.count('.') > 1: return False # Grammar: local-part -> alpha { [ dot | underscore ] ( alpha | num ) }" stream = TokenStream(localpart) # local-part must being with alpha alpa = stream.get_token(ALPHA) if alpa is None and managed: return False while True: # optional dot or underscore token stream.get_token(DOT) or stream.get_token(UNDERSCORE) # alpha or numeric alpanum = stream.get_token(ALPHA) or stream.get_token(NUMERIC) if alpanum is None: break # alpha or numeric must be end of stream if not stream.end_of_stream(): return False return True
class _AddressParser(object): ''' Do not use _AddressParser directly because it heavily relies on other private classes and methods and it's interface is not guarenteed, it will change in the future and possibly break your application. Instead use the parse() and parse_list() functions in the address.py module which will always return a scalar or iterable respectively. ''' def __init__(self, strict=False): self.stream = None self.strict = strict def address_list(self, stream): ''' Extract a mailbox and/or url list from a stream of input, operates in strict and relaxed modes. ''' # sanity check if not stream: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit address list length if len(stream) > MAX_ADDRESS_LIST_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address list length of ' + str(MAX_ADDRESS_LIST_LENGTH) + '.') # set stream self.stream = TokenStream(stream) if self.strict is True: return self._address_list_strict() return self._address_list_relaxed() def address(self, stream): ''' Extract a single address or url from a stream of input, always operates in strict mode. ''' # sanity check if not stream: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit mailbox length if len(stream) > MAX_ADDRESS_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address length of ' + str(MAX_ADDRESS_LENGTH) + '.') self.stream = TokenStream(stream) addr = self._address() if addr: # optional whitespace self._whitespace() # if we hit the end of the stream, we have a valid inbox if self.stream.end_of_stream(): return addr return None def address_spec(self, stream): ''' Extract a single address spec from a stream of input, always operates in strict mode. ''' # sanity check if stream is None: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit mailbox length if len(stream) > MAX_ADDRESS_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address length of ' + str(MAX_ADDRESS_LENGTH) + '.') self.stream = TokenStream(stream) addr = self._addr_spec() if addr: # optional whitespace self._whitespace() # if we hit the end of the stream, we have a valid inbox if self.stream.end_of_stream(): return addr return None def _mailbox_post_processing_checks(self, address): "Additional post processing checks to ensure mailbox is valid." parts = address.split('@') # check if local part is less than 128 octets, the actual # limit is 64 octets but we double the size here because # unsubscribe links are frequently longer lpart = parts[0] if len(lpart) > 128: return False # check if the domain is less than 255 octets domn = parts[1] if len(domn) > 253: return False # number of labels can not be over 127 labels = domn.split('.') if len(labels) > 127: return False for label in labels: # check the domain doesn't start or end with - and # the length of each label is no more than 63 octets if BAD_DOMAIN.search(label) or len(label) > 63: return False return True def _address_list_relaxed(self): "Grammar: address-list-relaxed -> address { delimiter address }" #addrs = [] addrs = flanker.addresslib.address.AddressList() unparsable = [] # address addr = self._address() if addr is None: # synchronize to the next delimiter (or end of line) # append the skipped over text to the unparsable list skip = self.stream.synchronize() if skip: unparsable.append(skip) # if no mailbox and end of stream, we were unable # return the unparsable stream if self.stream.end_of_stream(): return [], unparsable else: # if we found a delimiter or end of stream, we have a # valid mailbox, add it if self.stream.peek(DELIMITER) or self.stream.end_of_stream(): addrs.append(addr) else: # otherwise snychornize and add it the unparsable array skip = self.stream.synchronize() if skip: pre = self.stream.stream[:self.stream.stream.index(skip)] unparsable.append(pre + skip) # if we hit the end of the stream, return the results if self.stream.end_of_stream(): return [], [self.stream.stream] while True: # delimiter dlm = self.stream.get_token(DELIMITER) if dlm is None: skip = self.stream.synchronize() if skip: unparsable.append(skip) if self.stream.end_of_stream(): break # address start_pos = self.stream.position addr = self._address() if addr is None: skip = self.stream.synchronize() if skip: unparsable.append(skip) if self.stream.end_of_stream(): break else: # if we found a delimiter or end of stream, we have a # valid mailbox, add it if self.stream.peek(DELIMITER) or self.stream.end_of_stream(): addrs.append(addr) else: # otherwise snychornize and add it the unparsable array skip = self.stream.synchronize() if skip: sskip = self.stream.stream[start_pos:self.stream.position] unparsable.append(sskip) # if we hit the end of the stream, return the results if self.stream.end_of_stream(): return addrs, unparsable return addrs, unparsable def _address_list_strict(self): "Grammar: address-list-strict -> address { delimiter address }" #addrs = [] addrs = flanker.addresslib.address.AddressList() # address addr = self._address() if addr is None: return addrs if self.stream.peek(DELIMITER): addrs.append(addr) while True: # delimiter dlm = self.stream.get_token(DELIMITER) if dlm is None: break # address addr = self._address() if addr is None: break addrs.append(addr) return addrs def _address(self): "Grammar: address -> name-addr-rfc | name-addr-lax | addr-spec | url" start_pos = self.stream.position addr = self._name_addr_rfc() or self._name_addr_lax() or \ self._addr_spec() or self._url() # if email address, check that it passes post processing checks if addr and isinstance(addr, flanker.addresslib.address.EmailAddress): if self._mailbox_post_processing_checks(addr.address) is False: # roll back self.stream.position = start_pos return None return addr def _url(self): "Grammar: url -> url" earl = self.stream.get_token(URL) if earl is None: return None return flanker.addresslib.address.UrlAddress(to_utf8(earl)) def _name_addr_rfc(self): "Grammar: name-addr-rfc -> [ display-name-rfc ] angle-addr-rfc" start_pos = self.stream.position # optional displayname dname = self._display_name_rfc() aaddr = self._angle_addr_rfc() if aaddr is None: # roll back self.stream.position = start_pos return None if dname: return flanker.addresslib.address.EmailAddress(dname, aaddr) return flanker.addresslib.address.EmailAddress(None, aaddr) def _display_name_rfc(self): "Grammar: display-name-rfc -> [ whitespace ] word { whitespace word }" wrds = [] # optional whitespace self._whitespace() # word wrd = self._word() if wrd is None: return None wrds.append(wrd) while True: # whitespace wtsp = self._whitespace() if wtsp is None: break wrds.append(wtsp) # word wrd = self._word() if wrd is None: break wrds.append(wrd) return cleanup_display_name(''.join(wrds)) def _angle_addr_rfc(self): ''' Grammar: angle-addr-rfc -> [ whitespace ] < addr-spec > [ whitespace ]" ''' start_pos = self.stream.position # optional whitespace self._whitespace() # left angle bracket lbr = self.stream.get_token(LBRACKET) if lbr is None: # rollback self.stream.position = start_pos return None # addr-spec aspec = self._addr_spec(True) if aspec is None: # rollback self.stream.position = start_pos return None # right angle bracket rbr = self.stream.get_token(RBRACKET) if rbr is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() return aspec def _name_addr_lax(self): "Grammar: name-addr-lax -> [ display-name-lax ] angle-addr-lax" start_pos = self.stream.position # optional displayname dname = self._display_name_lax() aaddr = self._angle_addr_lax() if aaddr is None: # roll back self.stream.position = start_pos return None if dname: return flanker.addresslib.address.EmailAddress(dname, aaddr) return flanker.addresslib.address.EmailAddress(None, aaddr) def _display_name_lax(self): ''' Grammar: display-name-lax -> [ whitespace ] word { whitespace word } whitespace" ''' start_pos = self.stream.position wrds = [] # optional whitespace self._whitespace() # word wrd = self._word() if wrd is None: # roll back self.stream.position = start_pos return None wrds.append(wrd) # peek to see if we have a whitespace, # if we don't, we have a invalid display-name if self.stream.peek(WHITESPACE) is None or \ self.stream.peek(UNI_WHITE) is None: self.stream.position = start_pos return None while True: # whitespace wtsp = self._whitespace() if wtsp: wrds.append(wtsp) # if we need to roll back the next word start_pos = self.stream.position # word wrd = self._word() if wrd is None: self.stream.position = start_pos break wrds.append(wrd) # peek to see if we have a whitespace # if we don't pop off the last word break if self.stream.peek(WHITESPACE) is None or \ self.stream.peek(UNI_WHITE) is None: # roll back last word self.stream.position = start_pos wrds.pop() break return cleanup_display_name(''.join(wrds)) def _angle_addr_lax(self): "Grammar: angle-addr-lax -> addr-spec [ whitespace ]" start_pos = self.stream.position # addr-spec aspec = self._addr_spec(True) if aspec is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() return aspec def _addr_spec(self, as_string=False): ''' Grammar: addr-spec -> [ whitespace ] local-part @ domain [ whitespace ] ''' start_pos = self.stream.position # optional whitespace self._whitespace() lpart = self._local_part() if lpart is None: # rollback self.stream.position = start_pos return None asym = self.stream.get_token(AT_SYMBOL) if asym is None: # rollback self.stream.position = start_pos return None domn = self._domain() if domn is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() aspec = cleanup_email(''.join([lpart, asym, domn])) if as_string: return aspec return flanker.addresslib.address.EmailAddress(None, aspec) def _local_part(self): "Grammar: local-part -> dot-atom | quoted-string" return self.stream.get_token(DOT_ATOM) or \ self.stream.get_token(QSTRING) def _domain(self): "Grammar: domain -> dot-atom" return self.stream.get_token(DOT_ATOM) def _word(self): "Grammar: word -> word-ascii | word-unicode" start_pos = self.stream.position # ascii word ascii_wrd = self._word_ascii() if ascii_wrd and not self.stream.peek(UNI_ATOM): return ascii_wrd # didn't get an ascii word, rollback to try again self.stream.position = start_pos # unicode word return self._word_unicode() def _word_ascii(self): "Grammar: word-ascii -> atom | qstring" wrd = self.stream.get_token(RELAX_ATOM) or self.stream.get_token(QSTRING) if wrd and not contains_control_chars(wrd): return wrd return None def _word_unicode(self): "Grammar: word-unicode -> unicode-atom | unicode-qstring" start_pos = self.stream.position # unicode atom uwrd = self.stream.get_token(UNI_ATOM) if uwrd and isinstance(uwrd, unicode) and not contains_control_chars(uwrd): return uwrd # unicode qstr uwrd = self.stream.get_token(UNI_QSTR, 'qstr') if uwrd and isinstance(uwrd, unicode) and not contains_control_chars(uwrd): return u'"{}"'.format(encode_string(None, uwrd)) # rollback self.stream.position = start_pos return None def _whitespace(self): "Grammar: whitespace -> whitespace-ascii | whitespace-unicode" return self._whitespace_ascii() or self._whitespace_unicode() def _whitespace_ascii(self): "Grammar: whitespace-ascii -> whitespace-ascii" return self.stream.get_token(WHITESPACE) def _whitespace_unicode(self): "Grammar: whitespace-unicode -> whitespace-unicode" uwhite = self.stream.get_token(UNI_WHITE) if uwhite and not is_pure_ascii(uwhite): return uwhite return None
class _AddressParser(object): ''' Do not use _AddressParser directly because it heavily relies on other private classes and methods and it's interface is not guarenteed, it will change in the future and possibly break your application. Instead use the parse() and parse_list() functions in the address.py module which will always return a scalar or iterable respectively. ''' def __init__(self, strict=False): self.stream = None self.strict = strict def address_list(self, stream): ''' Extract a mailbox and/or url list from a stream of input, operates in strict and relaxed modes. ''' # sanity check if not stream: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit address list length if len(stream) > MAX_ADDRESS_LIST_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address list length of ' + str(MAX_ADDRESS_LIST_LENGTH) + '.') # set stream self.stream = TokenStream(stream) if self.strict is True: return self._address_list_strict() return self._address_list_relaxed() def address(self, stream): ''' Extract a single address or url from a stream of input, always operates in strict mode. ''' # sanity check if not stream: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit mailbox length if len(stream) > MAX_ADDRESS_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address length of ' + str(MAX_ADDRESS_LENGTH) + '.') self.stream = TokenStream(stream) addr = self._address() if addr: # optional whitespace self._whitespace() # if we hit the end of the stream, we have a valid inbox if self.stream.end_of_stream(): return addr return None def address_spec(self, stream): ''' Extract a single address spec from a stream of input, always operates in strict mode. ''' # sanity check if stream is None: raise ParserException('No input provided to parser.') if isinstance(stream, str) and not is_pure_ascii(stream): raise ParserException('ASCII string contains non-ASCII chars.') # to avoid spinning here forever, limit mailbox length if len(stream) > MAX_ADDRESS_LENGTH: raise ParserException('Stream length exceeds maximum allowable ' + \ 'address length of ' + str(MAX_ADDRESS_LENGTH) + '.') self.stream = TokenStream(stream) addr = self._addr_spec() if addr: # optional whitespace self._whitespace() # if we hit the end of the stream, we have a valid inbox if self.stream.end_of_stream(): return addr return None def _mailbox_post_processing_checks(self, address): "Additional post processing checks to ensure mailbox is valid." parts = address.split('@') # check if local part is less than 256 octets, the actual # limit is 64 octets but we quadruple the size here because # unsubscribe links are frequently longer lpart = parts[0] if len(lpart) > 256: return False # check if the domain is less than 255 octets domn = parts[1] if len(domn) > 253: return False # number of labels can not be over 127 labels = domn.split('.') if len(labels) > 127: return False for label in labels: # check the domain doesn't start or end with - and # the length of each label is no more than 63 octets if BAD_DOMAIN.search(label) or len(label) > 63: return False return True def _address_list_relaxed(self): "Grammar: address-list-relaxed -> address { delimiter address }" #addrs = [] addrs = flanker.addresslib.address.AddressList() unparsable = [] # address addr = self._address() if addr is None: # synchronize to the next delimiter (or end of line) # append the skipped over text to the unparsable list skip = self.stream.synchronize() if skip: unparsable.append(skip) # if no mailbox and end of stream, we were unable # return the unparsable stream if self.stream.end_of_stream(): return [], unparsable else: # if we found a delimiter or end of stream, we have a # valid mailbox, add it if self.stream.peek(DELIMITER) or self.stream.end_of_stream(): addrs.append(addr) else: # otherwise snychornize and add it the unparsable array skip = self.stream.synchronize() if skip: pre = self.stream.stream[:self.stream.stream.index(skip)] unparsable.append(pre + skip) # if we hit the end of the stream, return the results if self.stream.end_of_stream(): return [], [self.stream.stream] while True: # delimiter dlm = self.stream.get_token(DELIMITER) if dlm is None: skip = self.stream.synchronize() if skip: unparsable.append(skip) if self.stream.end_of_stream(): break # address start_pos = self.stream.position addr = self._address() if addr is None: skip = self.stream.synchronize() if skip: unparsable.append(skip) if self.stream.end_of_stream(): break else: # if we found a delimiter or end of stream, we have a # valid mailbox, add it if self.stream.peek(DELIMITER) or self.stream.end_of_stream(): addrs.append(addr) else: # otherwise snychornize and add it the unparsable array skip = self.stream.synchronize() if skip: sskip = self.stream.stream[start_pos:self.stream. position] unparsable.append(sskip) # if we hit the end of the stream, return the results if self.stream.end_of_stream(): return addrs, unparsable return addrs, unparsable def _address_list_strict(self): "Grammar: address-list-strict -> address { delimiter address }" #addrs = [] addrs = flanker.addresslib.address.AddressList() # address addr = self._address() if addr is None: return addrs if self.stream.peek(DELIMITER): addrs.append(addr) while True: # delimiter dlm = self.stream.get_token(DELIMITER) if dlm is None: break # address addr = self._address() if addr is None: break addrs.append(addr) return addrs def _address(self): "Grammar: address -> name-addr-rfc | name-addr-lax | addr-spec | url" start_pos = self.stream.position addr = self._name_addr_rfc() or self._name_addr_lax() or \ self._addr_spec() or self._url() # if email address, check that it passes post processing checks if addr and isinstance(addr, flanker.addresslib.address.EmailAddress): if self._mailbox_post_processing_checks(addr.address) is False: # roll back self.stream.position = start_pos return None return addr def _url(self): "Grammar: url -> url" earl = self.stream.get_token(URL) if earl is None: return None return flanker.addresslib.address.UrlAddress(to_utf8(earl)) def _name_addr_rfc(self): "Grammar: name-addr-rfc -> [ display-name-rfc ] angle-addr-rfc" start_pos = self.stream.position # optional displayname dname = self._display_name_rfc() aaddr = self._angle_addr_rfc() if aaddr is None: # roll back self.stream.position = start_pos return None if dname: return flanker.addresslib.address.EmailAddress(dname, aaddr) return flanker.addresslib.address.EmailAddress(None, aaddr) def _display_name_rfc(self): "Grammar: display-name-rfc -> [ whitespace ] word { whitespace word }" wrds = [] # optional whitespace self._whitespace() # word wrd = self._word() if wrd is None: return None wrds.append(wrd) while True: # whitespace wtsp = self._whitespace() if wtsp is None: break wrds.append(wtsp) # word wrd = self._word() if wrd is None: break wrds.append(wrd) return cleanup_display_name(''.join(wrds)) def _angle_addr_rfc(self): ''' Grammar: angle-addr-rfc -> [ whitespace ] < addr-spec > [ whitespace ]" ''' start_pos = self.stream.position # optional whitespace self._whitespace() # left angle bracket lbr = self.stream.get_token(LBRACKET) if lbr is None: # rollback self.stream.position = start_pos return None # addr-spec aspec = self._addr_spec(True) if aspec is None: # rollback self.stream.position = start_pos return None # right angle bracket rbr = self.stream.get_token(RBRACKET) if rbr is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() return aspec def _name_addr_lax(self): "Grammar: name-addr-lax -> [ display-name-lax ] angle-addr-lax" start_pos = self.stream.position # optional displayname dname = self._display_name_lax() aaddr = self._angle_addr_lax() if aaddr is None: # roll back self.stream.position = start_pos return None if dname: return flanker.addresslib.address.EmailAddress(dname, aaddr) return flanker.addresslib.address.EmailAddress(None, aaddr) def _display_name_lax(self): ''' Grammar: display-name-lax -> [ whitespace ] word { whitespace word } whitespace" ''' start_pos = self.stream.position wrds = [] # optional whitespace self._whitespace() # word wrd = self._word() if wrd is None: # roll back self.stream.position = start_pos return None wrds.append(wrd) # peek to see if we have a whitespace, # if we don't, we have a invalid display-name if self.stream.peek(WHITESPACE) is None or \ self.stream.peek(UNI_WHITE) is None: self.stream.position = start_pos return None while True: # whitespace wtsp = self._whitespace() if wtsp: wrds.append(wtsp) # if we need to roll back the next word start_pos = self.stream.position # word wrd = self._word() if wrd is None: self.stream.position = start_pos break wrds.append(wrd) # peek to see if we have a whitespace # if we don't pop off the last word break if self.stream.peek(WHITESPACE) is None or \ self.stream.peek(UNI_WHITE) is None: # roll back last word self.stream.position = start_pos wrds.pop() break return cleanup_display_name(''.join(wrds)) def _angle_addr_lax(self): "Grammar: angle-addr-lax -> addr-spec [ whitespace ]" start_pos = self.stream.position # addr-spec aspec = self._addr_spec(True) if aspec is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() return aspec def _addr_spec(self, as_string=False): ''' Grammar: addr-spec -> [ whitespace ] local-part @ domain [ whitespace ] ''' start_pos = self.stream.position # optional whitespace self._whitespace() lpart = self._local_part() if lpart is None: # rollback self.stream.position = start_pos return None asym = self.stream.get_token(AT_SYMBOL) if asym is None: # rollback self.stream.position = start_pos return None domn = self._domain() if domn is None: # rollback self.stream.position = start_pos return None # optional whitespace self._whitespace() aspec = cleanup_email(''.join([lpart, asym, domn])) if as_string: return aspec return flanker.addresslib.address.EmailAddress(None, aspec) def _local_part(self): "Grammar: local-part -> dot-atom | quoted-string" return self.stream.get_token(DOT_ATOM) or \ self.stream.get_token(QSTRING) def _domain(self): "Grammar: domain -> dot-atom" return self.stream.get_token(DOT_ATOM) def _word(self): "Grammar: word -> word-ascii | word-unicode" start_pos = self.stream.position # ascii word ascii_wrd = self._word_ascii() if ascii_wrd and not self.stream.peek(UNI_ATOM): return ascii_wrd # didn't get an ascii word, rollback to try again self.stream.position = start_pos # unicode word return self._word_unicode() def _word_ascii(self): "Grammar: word-ascii -> atom | qstring" wrd = self.stream.get_token(RELAX_ATOM) or self.stream.get_token( QSTRING) if wrd and not contains_control_chars(wrd): return wrd return None def _word_unicode(self): "Grammar: word-unicode -> unicode-atom | unicode-qstring" start_pos = self.stream.position # unicode atom uwrd = self.stream.get_token(UNI_ATOM) if uwrd and isinstance(uwrd, unicode) and not contains_control_chars(uwrd): return uwrd # unicode qstr uwrd = self.stream.get_token(UNI_QSTR, 'qstr') if uwrd and isinstance(uwrd, unicode) and not contains_control_chars(uwrd): return u'"{0}"'.format(encode_string(None, uwrd)) # rollback self.stream.position = start_pos return None def _whitespace(self): "Grammar: whitespace -> whitespace-ascii | whitespace-unicode" return self._whitespace_ascii() or self._whitespace_unicode() def _whitespace_ascii(self): "Grammar: whitespace-ascii -> whitespace-ascii" return self.stream.get_token(WHITESPACE) def _whitespace_unicode(self): "Grammar: whitespace-unicode -> whitespace-unicode" uwhite = self.stream.get_token(UNI_WHITE) if uwhite and not is_pure_ascii(uwhite): return uwhite return None