def parse_csv(myfile, newsletter, ignore_errors=False):
    from newsletter.addressimport.csv_util import UnicodeReader
    import codecs, csv

    # Detect encoding
    from chardet.universaldetector import UniversalDetector

    detector = UniversalDetector()
    for line in myfile.readlines():
        detector.feed(line)
        if detector.done:
            break
    detector.close()
    charset = detector.result["encoding"]

    # Reset the file index
    myfile.seek(0)

    # Attempt to detect the dialect
    encodedfile = codecs.EncodedFile(myfile, charset)
    dialect = csv.Sniffer().sniff(encodedfile.read(1024))

    # Reset the file index
    myfile.seek(0)

    myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset)

    firstrow = myreader.next()

    # Find name column
    colnum = 0
    namecol = None
    for column in firstrow:
        if "name" in column.lower() or ugettext("name") in column.lower():
            namecol = colnum

            if "display" in column.lower() or ugettext("display") in column.lower():
                break

        colnum += 1

    if namecol is None:
        raise forms.ValidationError(
            _("Name column not found. The name of this column should be either 'name' or '%s'.") % ugettext("name")
        )

    logger.debug("Name column found: '%s'" % firstrow[namecol])

    # Find email column
    colnum = 0
    mailcol = None
    for column in firstrow:
        if "email" in column.lower() or "e-mail" in column.lower() or ugettext("e-mail") in column.lower():
            mailcol = colnum

            break

        colnum += 1

    if mailcol is None:
        raise forms.ValidationError(
            _("E-mail column not found. The name of this column should be either 'email', 'e-mail' or '%s'.")
            % ugettext("e-mail")
        )

    logger.debug("E-mail column found: '%s'" % firstrow[mailcol])

    # assert namecol != mailcol, 'Name and e-mail column should not be the same.'
    if namecol == mailcol:
        raise forms.ValidationError(
            _(
                "Could not properly determine the proper columns in the CSV-file. There should be a field called 'name' or '%(name)s' and one called 'e-mail' or '%(e-mail)s'."
            )
            % {"name": _("name"), "e-mail": _("e-mail")}
        )

    logger.debug("Extracting data.")

    addresses = {}
    for row in myreader:
        name = check_name(row[namecol], ignore_errors)
        email = check_email(row[mailcol], ignore_errors)

        logger.debug("Going to add %s <%s>" % (name, email))

        if email_re.search(email):
            addr = make_subscription(newsletter, email, name)
        elif not ignore_errors:
            raise forms.ValidationError(_("Entry '%s' does not contain a valid e-mail address.") % name)

        if addr:
            if addresses.has_key(email) and not ignore_errors:
                raise forms.ValidationError(_("The address file contains duplicate entries for '%s'.") % email)

            addresses.update({email: addr})
        elif not ignore_errors:
            raise forms.ValidationError(_("Some entries are already subscribed to."))

    return addresses
Esempio n. 2
0
def parse_csv(myfile, newsletter, ignore_errors=False):
    from newsletter.addressimport.csv_util import UnicodeReader
    import codecs, csv

    # Detect encoding
    from chardet.universaldetector import UniversalDetector

    detector = UniversalDetector()
    for line in myfile.readlines():
        detector.feed(line)
        if detector.done: break
    detector.close()
    charset = detector.result['encoding']

    # Reset the file index
    myfile.seek(0)

    # Attempt to detect the dialect
    encodedfile = codecs.EncodedFile(myfile, charset)
    dialect = csv.Sniffer().sniff(encodedfile.read(1024))

    # Reset the file index
    myfile.seek(0)

    logger.info('Detected encoding %s and dialect %s for CSV file',
                charset, dialect)

    myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset)

    firstrow = myreader.next()

    # Find name column
    colnum = 0
    namecol = None
    for column in firstrow:
        if "name" in column.lower() or ugettext("name") in column.lower():
            namecol = colnum

            if "display" in column.lower() or ugettext("display") in column.lower():
                break

        colnum += 1

    if namecol is None:
        raise forms.ValidationError(_("Name column not found. The name of this column should be either 'name' or '%s'.") % ugettext("name"))

    logger.debug("Name column found: '%s'", firstrow[namecol])

    # Find email column
    colnum = 0
    mailcol = None
    for column in firstrow:
        if 'email' in column.lower() or 'e-mail' in column.lower() or ugettext("e-mail") in column.lower():
            mailcol = colnum

            break

        colnum += 1

    if mailcol is None:
        raise forms.ValidationError(_("E-mail column not found. The name of this column should be either 'email', 'e-mail' or '%s'.") % ugettext("e-mail"))

    logger.debug("E-mail column found: '%s'", firstrow[mailcol])

    #assert namecol != mailcol, 'Name and e-mail column should not be the same.'
    if namecol == mailcol:
        raise forms.ValidationError(_("Could not properly determine the proper columns in the CSV-file. There should be a field called 'name' or '%(name)s' and one called 'e-mail' or '%(e-mail)s'.") % {"name":_("name"), "e-mail":_("e-mail")})

    logger.debug('Extracting data.')

    addresses = {}
    for row in myreader:
        if not max(namecol, mailcol) < len(row):
            logger.warn("Column count does not match for row number %d", 
                        myreader.line_num, extra=dict(data={'row':row}))

            if ignore_errors:
                # Skip this record
                continue
            else:
                raise forms.ValidationError(_("Row with content '%s' does not contain a name and email field.") % row)

        name = check_name(row[namecol], ignore_errors)
        email = check_email(row[mailcol], ignore_errors)

        logger.debug("Going to add %s <%s>", name, email)

        if email_re.search(email):
            addr = make_subscription(newsletter, email, name)
        elif not ignore_errors:
                raise forms.ValidationError(_("Entry '%s' does not contain a valid e-mail address.") % name)
        else:
            logger.warn("Entry '%s' at line %d does not contain a valid e-mail address.",
                        name, myreader.line_num, extra=dict(data={'row':row}))


        if addr:
            if addresses.has_key(email):
                logger.warn("Entry '%s' at line %d contains a duplicate entry for '%s'",
                    name, myreader.line_num, email, extra=dict(data={'row':row}))

                if not ignore_errors:
                    raise forms.ValidationError(_("The address file contains duplicate entries for '%s'.") % email)

            addresses.update({email:addr})
        else:
            logger.warn("Entry '%s' at line %d is already subscribed to with email '%s'",
                name, myreader.line_num, email, extra=dict(data={'row':row}))

            if not ignore_errors:
                raise forms.ValidationError(_("Some entries are already subscribed to."))

    return addresses
def parse_csv(myfile, newsletter, ignore_errors=False):
    from newsletter.addressimport.csv_util import UnicodeReader
    import codecs
    import csv

    # Detect encoding
    from chardet.universaldetector import UniversalDetector

    detector = UniversalDetector()

    for line in myfile.readlines():
        detector.feed(line)
        if detector.done:
            break

    detector.close()
    charset = detector.result['encoding']

    # Reset the file index
    myfile.seek(0)

    # Attempt to detect the dialect
    encodedfile = codecs.EncodedFile(myfile, charset)
    dialect = csv.Sniffer().sniff(encodedfile.read(1024))

    # Reset the file index
    myfile.seek(0)

    logger.info('Detected encoding %s and dialect %s for CSV file',
                charset, dialect)

    myreader = UnicodeReader(myfile, dialect=dialect, encoding=charset)

    firstrow = myreader.next()

    # Find name column
    colnum = 0
    namecol = None
    for column in firstrow:
        if "name" in column.lower() or ugettext("name") in column.lower():
            namecol = colnum

            if "display" in column.lower() or \
                    ugettext("display") in column.lower():
                break

        colnum += 1

    if namecol is None:
        raise forms.ValidationError(_(
            "Name column not found. The name of this column should be "
            "either 'name' or '%s'.") % ugettext("name")
        )

    logger.debug("Name column found: '%s'", firstrow[namecol])

    # Find email column
    colnum = 0
    mailcol = None
    for column in firstrow:
        if 'email' in column.lower() or \
                'e-mail' in column.lower() or \
                ugettext("e-mail") in column.lower():

            mailcol = colnum

            break

        colnum += 1

    if mailcol is None:
        raise forms.ValidationError(_(
            "E-mail column not found. The name of this column should be "
            "either 'email', 'e-mail' or '%(email)s'.") %
            {'email': ugettext("e-mail")}
        )

    logger.debug("E-mail column found: '%s'", firstrow[mailcol])

    #assert namecol != mailcol, \
    #    'Name and e-mail column should not be the same.'
    if namecol == mailcol:
        raise forms.ValidationError(
            _(
                "Could not properly determine the proper columns in the "
                "CSV-file. There should be a field called 'name' or "
                "'%(name)s' and one called 'e-mail' or '%(e-mail)s'."
            ) % {
                "name": _("name"),
                "e-mail": _("e-mail")
            }
        )

    logger.debug('Extracting data.')

    addresses = {}
    for row in myreader:
        if not max(namecol, mailcol) < len(row):
            logger.warn("Column count does not match for row number %d",
                        myreader.line_num, extra=dict(data={'row': row}))

            if ignore_errors:
                # Skip this record
                continue
            else:
                raise forms.ValidationError(_(
                    "Row with content '%(row)s' does not contain a name and "
                    "email field.") % {'row': row}
                )

        name = check_name(row[namecol], ignore_errors)
        email = check_email(row[mailcol], ignore_errors)

        logger.debug("Going to add %s <%s>", name, email)

        if email_re.search(email):
            addr = make_subscription(newsletter, email, name)
        elif not ignore_errors:
                raise forms.ValidationError(_(
                    "Entry '%s' does not contain a valid "
                    "e-mail address.") % name
                )
        else:
            logger.warn(
                "Entry '%s' at line %d does not contain a valid "
                "e-mail address.",
                name, myreader.line_num, extra=dict(data={'row': row}))

        if addr:
            if email in addresses:
                logger.warn(
                    "Entry '%s' at line %d contains a "
                    "duplicate entry for '%s'",
                    name, myreader.line_num, email,
                    extra=dict(data={'row': row}))

                if not ignore_errors:
                    raise forms.ValidationError(_(
                        "The address file contains duplicate entries "
                        "for '%s'.") % email)

            addresses.update({email: addr})
        else:
            logger.warn(
                "Entry '%s' at line %d is already subscribed to "
                "with email '%s'",
                name, myreader.line_num, email, extra=dict(data={'row': row}))

            if not ignore_errors:
                raise forms.ValidationError(
                    _("Some entries are already subscribed to."))

    return addresses