def get_file_format(the_line): email_field = -1 delim = None sep = None fields = parse(the_line) debug(fields) i = 0 for f in fields: if validate_email(f): email_field = i break i += 1 ff = file_format() ff.field_count = len(fields) ff.email_field = email_field if ff.email_field == -1 and i == 1: #only one field, assume email ff.email_field = 0 return ff
def process_file(infile): global config good = 0 bad = 0 cleaned = 0 total = 0 pf = ProcessedFile() outfile = outfile_name(infile) #open file streams instream = open(infile, 'r') outstream = open(outfile, 'aw') ff = None #file_format obj for line in instream: total += 1 if (total == 1 and config['has_header']): ff = get_file_format(line) if ff.email_field == -1: #get it from user fields = parse(line) email_field = ask_user_which_field_is_email(fields) email_field -= 1 #comp for 0 index ff.email_field = email_field outstream.write(line) #write header continue fields = parse(line, ff) email = fields[ff.email_field] email = clean_email(email) print total, " -- ", len(fields) if validate_email(email): good += 1 if email != fields[ff.email_field]: cleaned += 1 line = line.replace(fields[ff.email_field], email) #TODO: need to get the line back from feilds, but for now just write the line outstream.write(line) else: bad += 1 pf.good_emails = good pf.bad_emails = bad if config['has_header']: total -= 1 pf.total_emails = total #close streams instream.close() outstream.close() ow = get_opt('overwrite_files') if ow: os.rename(outfile, infile) return pf