Example #1
0
def get_file_format(the_line):
	email_field = -1
	delim = None
	sep = None
	
	fields = parse(the_line)
	debug(fields)
	
	i = 0
	for f in fields:
		if validate_email(f):
			email_field = i
			break
		i += 1
		
	ff = file_format()
	ff.field_count = len(fields)
	ff.email_field = email_field
	
	if ff.email_field == -1 and i == 1: #only one field, assume email
		ff.email_field = 0
		
	return ff
Example #2
0
def process_file(infile):
	global config
	
	good = 0
	bad = 0
	cleaned = 0
	total = 0
	pf = ProcessedFile()
	
	outfile = outfile_name(infile)
	#open file streams
	instream = open(infile, 'r')
	outstream = open(outfile, 'aw')
	
	ff = None #file_format obj
	for line in instream:
		total += 1	
		
		if (total == 1 and config['has_header']):
			ff = get_file_format(line)
			if ff.email_field == -1:
				#get it from user
				fields = parse(line)
				email_field = ask_user_which_field_is_email(fields)
				email_field -= 1 #comp for 0 index
				ff.email_field = email_field

			outstream.write(line) #write header
			continue

		fields = parse(line, ff)
		email = fields[ff.email_field]
		email = clean_email(email)
		
		print total, " -- ", len(fields)
		
		if validate_email(email):
			good += 1
			if email != fields[ff.email_field]:
				cleaned += 1
				line = line.replace(fields[ff.email_field], email)
			
			#TODO: need to get the line back from feilds, but for now just write the line
			outstream.write(line)
		else:
			bad += 1
			
	pf.good_emails = good
	pf.bad_emails = bad
	if config['has_header']:
		total -= 1
	pf.total_emails = total
	
	#close streams	
	instream.close()
	outstream.close()
	
	ow = get_opt('overwrite_files')
	if ow:
		os.rename(outfile, infile)

	return pf