def parse_csv(myfile, newsletter, ignore_errors=False): from newsletter.addressimport.csv_util import UnicodeReader import codecs, csv # Detect encoding from chardet.universaldetector import UniversalDetector detector = UniversalDetector() for line in myfile.readlines(): detector.feed(line) if detector.done: break detector.close() charset = detector.result["encoding"] # Reset the file index myfile.seek(0) # Attempt to detect the dialect encodedfile = codecs.EncodedFile(myfile, charset) dialect = csv.Sniffer().sniff(encodedfile.read(1024)) # Reset the file index myfile.seek(0) myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset) firstrow = myreader.next() # Find name column colnum = 0 namecol = None for column in firstrow: if "name" in column.lower() or ugettext("name") in column.lower(): namecol = colnum if "display" in column.lower() or ugettext("display") in column.lower(): break colnum += 1 if namecol is None: raise forms.ValidationError( _("Name column not found. The name of this column should be either 'name' or '%s'.") % ugettext("name") ) logger.debug("Name column found: '%s'" % firstrow[namecol]) # Find email column colnum = 0 mailcol = None for column in firstrow: if "email" in column.lower() or "e-mail" in column.lower() or ugettext("e-mail") in column.lower(): mailcol = colnum break colnum += 1 if mailcol is None: raise forms.ValidationError( _("E-mail column not found. The name of this column should be either 'email', 'e-mail' or '%s'.") % ugettext("e-mail") ) logger.debug("E-mail column found: '%s'" % firstrow[mailcol]) # assert namecol != mailcol, 'Name and e-mail column should not be the same.' if namecol == mailcol: raise forms.ValidationError( _( "Could not properly determine the proper columns in the CSV-file. There should be a field called 'name' or '%(name)s' and one called 'e-mail' or '%(e-mail)s'." ) % {"name": _("name"), "e-mail": _("e-mail")} ) logger.debug("Extracting data.") addresses = {} for row in myreader: name = check_name(row[namecol], ignore_errors) email = check_email(row[mailcol], ignore_errors) logger.debug("Going to add %s <%s>" % (name, email)) if email_re.search(email): addr = make_subscription(newsletter, email, name) elif not ignore_errors: raise forms.ValidationError(_("Entry '%s' does not contain a valid e-mail address.") % name) if addr: if addresses.has_key(email) and not ignore_errors: raise forms.ValidationError(_("The address file contains duplicate entries for '%s'.") % email) addresses.update({email: addr}) elif not ignore_errors: raise forms.ValidationError(_("Some entries are already subscribed to.")) return addresses
def parse_csv(myfile, newsletter, ignore_errors=False): from newsletter.addressimport.csv_util import UnicodeReader import codecs, csv # Detect encoding from chardet.universaldetector import UniversalDetector detector = UniversalDetector() for line in myfile.readlines(): detector.feed(line) if detector.done: break detector.close() charset = detector.result['encoding'] # Reset the file index myfile.seek(0) # Attempt to detect the dialect encodedfile = codecs.EncodedFile(myfile, charset) dialect = csv.Sniffer().sniff(encodedfile.read(1024)) # Reset the file index myfile.seek(0) logger.info('Detected encoding %s and dialect %s for CSV file', charset, dialect) myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset) firstrow = myreader.next() # Find name column colnum = 0 namecol = None for column in firstrow: if "name" in column.lower() or ugettext("name") in column.lower(): namecol = colnum if "display" in column.lower() or ugettext("display") in column.lower(): break colnum += 1 if namecol is None: raise forms.ValidationError(_("Name column not found. The name of this column should be either 'name' or '%s'.") % ugettext("name")) logger.debug("Name column found: '%s'", firstrow[namecol]) # Find email column colnum = 0 mailcol = None for column in firstrow: if 'email' in column.lower() or 'e-mail' in column.lower() or ugettext("e-mail") in column.lower(): mailcol = colnum break colnum += 1 if mailcol is None: raise forms.ValidationError(_("E-mail column not found. The name of this column should be either 'email', 'e-mail' or '%s'.") % ugettext("e-mail")) logger.debug("E-mail column found: '%s'", firstrow[mailcol]) #assert namecol != mailcol, 'Name and e-mail column should not be the same.' if namecol == mailcol: raise forms.ValidationError(_("Could not properly determine the proper columns in the CSV-file. There should be a field called 'name' or '%(name)s' and one called 'e-mail' or '%(e-mail)s'.") % {"name":_("name"), "e-mail":_("e-mail")}) logger.debug('Extracting data.') addresses = {} for row in myreader: if not max(namecol, mailcol) < len(row): logger.warn("Column count does not match for row number %d", myreader.line_num, extra=dict(data={'row':row})) if ignore_errors: # Skip this record continue else: raise forms.ValidationError(_("Row with content '%s' does not contain a name and email field.") % row) name = check_name(row[namecol], ignore_errors) email = check_email(row[mailcol], ignore_errors) logger.debug("Going to add %s <%s>", name, email) if email_re.search(email): addr = make_subscription(newsletter, email, name) elif not ignore_errors: raise forms.ValidationError(_("Entry '%s' does not contain a valid e-mail address.") % name) else: logger.warn("Entry '%s' at line %d does not contain a valid e-mail address.", name, myreader.line_num, extra=dict(data={'row':row})) if addr: if addresses.has_key(email): logger.warn("Entry '%s' at line %d contains a duplicate entry for '%s'", name, myreader.line_num, email, extra=dict(data={'row':row})) if not ignore_errors: raise forms.ValidationError(_("The address file contains duplicate entries for '%s'.") % email) addresses.update({email:addr}) else: logger.warn("Entry '%s' at line %d is already subscribed to with email '%s'", name, myreader.line_num, email, extra=dict(data={'row':row})) if not ignore_errors: raise forms.ValidationError(_("Some entries are already subscribed to.")) return addresses
def parse_csv(myfile, newsletter, ignore_errors=False): from newsletter.addressimport.csv_util import UnicodeReader import codecs import csv # Detect encoding from chardet.universaldetector import UniversalDetector detector = UniversalDetector() for line in myfile.readlines(): detector.feed(line) if detector.done: break detector.close() charset = detector.result['encoding'] # Reset the file index myfile.seek(0) # Attempt to detect the dialect encodedfile = codecs.EncodedFile(myfile, charset) dialect = csv.Sniffer().sniff(encodedfile.read(1024)) # Reset the file index myfile.seek(0) logger.info('Detected encoding %s and dialect %s for CSV file', charset, dialect) myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset) firstrow = myreader.next() # Find name column colnum = 0 namecol = None for column in firstrow: if "name" in column.lower() or ugettext("name") in column.lower(): namecol = colnum if "display" in column.lower() or \ ugettext("display") in column.lower(): break colnum += 1 if namecol is None: raise forms.ValidationError(_( "Name column not found. The name of this column should be " "either 'name' or '%s'.") % ugettext("name") ) logger.debug("Name column found: '%s'", firstrow[namecol]) # Find email column colnum = 0 mailcol = None for column in firstrow: if 'email' in column.lower() or \ 'e-mail' in column.lower() or \ ugettext("e-mail") in column.lower(): mailcol = colnum break colnum += 1 if mailcol is None: raise forms.ValidationError(_( "E-mail column not found. The name of this column should be " "either 'email', 'e-mail' or '%(email)s'.") % {'email': ugettext("e-mail")} ) logger.debug("E-mail column found: '%s'", firstrow[mailcol]) #assert namecol != mailcol, \ # 'Name and e-mail column should not be the same.' if namecol == mailcol: raise forms.ValidationError( _( "Could not properly determine the proper columns in the " "CSV-file. There should be a field called 'name' or " "'%(name)s' and one called 'e-mail' or '%(e-mail)s'." ) % { "name": _("name"), "e-mail": _("e-mail") } ) logger.debug('Extracting data.') addresses = {} for row in myreader: if not max(namecol, mailcol) < len(row): logger.warn("Column count does not match for row number %d", myreader.line_num, extra=dict(data={'row': row})) if ignore_errors: # Skip this record continue else: raise forms.ValidationError(_( "Row with content '%(row)s' does not contain a name and " "email field.") % {'row': row} ) name = check_name(row[namecol], ignore_errors) email = check_email(row[mailcol], ignore_errors) logger.debug("Going to add %s <%s>", name, email) if email_re.search(email): addr = make_subscription(newsletter, email, name) elif not ignore_errors: raise forms.ValidationError(_( "Entry '%s' does not contain a valid " "e-mail address.") % name ) else: logger.warn( "Entry '%s' at line %d does not contain a valid " "e-mail address.", name, myreader.line_num, extra=dict(data={'row': row})) if addr: if email in addresses: logger.warn( "Entry '%s' at line %d contains a " "duplicate entry for '%s'", name, myreader.line_num, email, extra=dict(data={'row': row})) if not ignore_errors: raise forms.ValidationError(_( "The address file contains duplicate entries " "for '%s'.") % email) addresses.update({email: addr}) else: logger.warn( "Entry '%s' at line %d is already subscribed to " "with email '%s'", name, myreader.line_num, email, extra=dict(data={'row': row})) if not ignore_errors: raise forms.ValidationError( _("Some entries are already subscribed to.")) return addresses