def random_filename(fnamelength=8, ext_prob=1): """Create a filename as a random sequance of letters and numbers, possibly with an extension.""" fname = random_string(fnamelength) ext = '' if random.random() < ext_prob: ext = '.' + random_string(3) return fname + ext
def create_fake_email_contents(n_headers=5): """Create email contents similar to what is requested by email RFC.""" contents = [] # Create required headers for header, prob, generator in GENERATED_EMAIL_HEADERS: if random.random() < prob: contents.append(header + generator()) # Create additional fake headers for i in range(n_headers): contents.append(random_string(20, chars=FNAMECHARS) + ': ' + random_string(30, FNAMECHARS)) # Add empty line separating headers and body contents.append('') # Add body contents.append(random_string(200, chars=FCONTENTSCHARS)) return '\n'.join(contents)
def create_fake_email_contents(n_headers=5): """Create email contents similar to what is requested by email RFC.""" contents = [] # Create required headers for header, prob, generator in GENERATED_EMAIL_HEADERS: if random.random() < prob: contents.append(header + generator()) # Create additional fake headers for i in range(n_headers): contents.append( random_string(20, chars=FNAMECHARS) + ': ' + random_string(30, FNAMECHARS)) # Add empty line separating headers and body contents.append('') # Add body contents.append(random_string(200, chars=FCONTENTSCHARS)) return '\n'.join(contents)
def create_corpus_dictionary(nitems=NEMAILS): """Create a random dictionary of email file names and their contents.""" d = {} for i in range(nitems): filename = random_filename() contents = random_string(200, chars=FCONTENTSCHARS) d[filename] = contents return d
def create_corpus_dictionary(nitems=N_EMAILS): """Create a random dictionary of email file names and their contents.""" d = {} for i in range(nitems): filename = random_filename() contents = random_string(200, chars=FCONTENTS_CHARS) d[filename] = contents return d
def random_email_address(namelength=5, domain2length=7, domain1length=3): return random_string(namelength, LCCHARS) + \ '@' + \ random_string(domain2length, LCCHARS) + \ '.' + \ random_string(domain1length, LCCHARS)
"""Create a random dictionary of email file names and their contents.""" d = {} for i in range(nitems): filename = random_filename() contents = create_fake_email_contents() d[filename] = contents return d # The following structure contains a triple for each header: # (header string, probability of generating this header, function used to generate the contents) GENERATED_EMAIL_HEADERS = [('Date: ', 1.0, random_date), ('From: ', 1.0, random_email_address), ('To: ', 0.5, random_email_address), ('Subject: ', 0.5, lambda: random_string(30, chars=FNAMECHARS + ' '))] def create_fake_email_contents(n_headers=5): """Create email contents similar to what is requested by email RFC.""" contents = [] # Create required headers for header, prob, generator in GENERATED_EMAIL_HEADERS: if random.random() < prob: contents.append(header + generator()) # Create additional fake headers for i in range(n_headers): contents.append( random_string(20, chars=FNAMECHARS) + ': ' + random_string(30, FNAMECHARS)) # Add empty line separating headers and body
def create_corpus_dictionary(nitems=N_EMAILS): """Create a random dictionary of email file names and their contents.""" d = {} for i in range(nitems): filename = random_filename() contents = create_fake_email_contents() d[filename] = contents return d # The following structure contains a triple for each header: # (header string, probability of generating this header, function used to generate the contents) GENERATED_EMAIL_HEADERS = [ ('Date: ', 1.0, random_date), ('From: ', 1.0, random_email_address), ('To: ', 0.5, random_email_address), ('Subject: ', 0.5, lambda: random_string(30, chars=FNAMECHARS+' ')) ] def create_fake_email_contents(n_headers=5): """Create email contents similar to what is requested by email RFC.""" contents = [] # Create required headers for header, prob, generator in GENERATED_EMAIL_HEADERS: if random.random() < prob: contents.append(header + generator()) # Create additional fake headers for i in range(n_headers): contents.append(random_string(20, chars=FNAMECHARS) + ': ' + random_string(30, FNAMECHARS)) # Add empty line separating headers and body contents.append('') # Add body