Exemple #1
0
 def _write_charts(self):
     # delegate to object
     if len(self._charts) != len(set(self._charts)):
         raise InvalidFileException("The same chart cannot be used in more than one worksheet")
     for chart in self._charts:
         self._archive.writestr(chart.path[1:], tostring(chart._write()))
         self.manifest.append(chart)
Exemple #2
0
def _validate_archive(filename):
    """
    Does a first check whether filename is a string or a file-like
    object. If it is a string representing a filename, a check is done
    for supported formats by checking the given file-extension. If the
    file-extension is not in SUPPORTED_FORMATS an InvalidFileException
    will raised. Otherwise the filename (resp. file-like object) will
    forwarded to zipfile.ZipFile returning a ZipFile-Instance.
    """
    is_file_like = hasattr(filename, 'read')
    if not is_file_like:
        file_format = os.path.splitext(filename)[-1].lower()
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (
                           file_format, ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

    archive = ZipFile(filename, 'r')
    return archive
Exemple #3
0
def _validate_archive(filename):
    """
    Check the file is a valid zipfile
    """
    is_file_like = hasattr(filename, 'read')

    if not is_file_like and os.path.isfile(filename):
        file_format = os.path.splitext(filename)[-1].lower()
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (
                           file_format, ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

    if is_file_like:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if getattr(filename, 'encoding', None) is not None:
            raise IOError("File-object must be opened in binary mode")

    archive = ZipFile(filename, 'r')
    return archive
def load_workbook_and_get_emails(path_to_wb: str, sheet_name: str, column_emails: str) -> list:
    """
        This function open a xlsx file and returns all the emails in a list

    :param path_to_wb: This is the path to the xlsx file. E.g: /home/mendes/workbook.xlsx
    :param sheet_name: This is the workbook sheet name, which contains the emails data
    :param column_emails: This is a letter that represent the letter of the column. E.g: A, B, C, D
    :return: This function returns a list of emails.
    """

    try:
        sheet = openpyxl.load_workbook(path_to_wb)[sheet_name]
    except KeyError:
        raise KeyError("Any sheet found with that title.")
    except InvalidFileException:
        raise InvalidFileException("Couldn't open the path expecified.")
    except SheetTitleException:
        raise SheetTitleException("Sheet title Misspelled.")

    # All rows in the xlsx file
    emails = []
    for row_count in range(2, sheet.max_row + 1):
        try:
            emails.append(sheet.cell(row=row_count, column=int(alphabet_position(column_emails.lower()))).value)
        except ValueError:
            print("Value not found in row: {}".format(row_count))
            continue

    emails = [email for email in emails if email is not None]

    return emails
def test_bad_xlsx():
    """
    test bad_xlsx
    """
    error_list = []
    error = InvalidFileException("some_error_message")
    bad_xlsx(error_list, "some_path", error)
    assert error_list == [{
        "explanation":
        "The file was not recognized as a valid xlsx sheet and could not be" +
        " loaded.",
        "affected_paths": [],
        "raw_or_parse":
        "RAW",
        "severity":
        "CRITICAL",
    }]
Exemple #6
0
def load_workbook_and_get_emails(path_to_wb: str, sheet_name: str,
                                 column_id: str, column_emails: str) -> list:
    """
        This function open a xlsx file and returns all the emails in a list

    :param path_to_wb: This is the path to the xlsx file. E.g: /home/mendes/workbook.xlsx
    :param sheet_name: This is the workbook sheet name, which contains the emails data
    :param column_emails: This is a letter that represent the letter of the column. E.g: A, B, C, D
    :return: This function returns a list of emails.
    """

    try:
        sheet = openpyxl.load_workbook(path_to_wb)[sheet_name]
    except KeyError:
        raise KeyError("Any sheet found with that title.")
    except InvalidFileException:
        raise InvalidFileException("Couldn't open the path expecified.")
    except SheetTitleException:
        raise SheetTitleException("Sheet title Misspelled.")

    # All rows in the xlsx file
    list_id_with_emails_temp = []
    for row_count in range(2, sheet.max_row + 1):
        try:
            email_number_conversor_from_letter = alphabet_position(
                column_emails.lower())
            id_number_conversor_from_letter = alphabet_position(
                column_id.lower())
            _id = sheet.cell(row=row_count,
                             column=int(id_number_conversor_from_letter)).value
            email = sheet.cell(
                row=row_count,
                column=int(email_number_conversor_from_letter)).value
            list_id_with_emails_temp.append((_id, email))
        except ValueError:
            print("Value not found in row: {}".format(row_count))
            continue
    list_id_with_emails = []
    for values in list_id_with_emails_temp:
        if values[0] != None and values[1] != None:
            list_id_with_emails.append(values)

    return list_id_with_emails
Exemple #7
0
def load_workbook(filename,
                  read_only=False,
                  use_iterators=False,
                  keep_vba=KEEP_VBA,
                  guess_types=False,
                  data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    read_only = read_only or use_iterators

    is_file_like = hasattr(filename, 'read')

    if not is_file_like and os.path.isfile(filename):
        file_format = os.path.splitext(filename)[-1]
        if file_format not in SUPPORTED_FORMATS:
            if file_format == '.xls':
                msg = ('openpyxl does not support the old .xls file format, '
                       'please use xlrd to read this file, or convert it to '
                       'the more recent .xlsx file format.')
            elif file_format == '.xlsb':
                msg = ('openpyxl does not support binary format .xlsb, '
                       'please convert this file to .xlsx format if you want '
                       'to open it with openpyxl')
            else:
                msg = ('openpyxl does not support %s file format, '
                       'please check you can open '
                       'it with Excel first. '
                       'Supported formats are: %s') % (
                           file_format, ','.join(SUPPORTED_FORMATS))
            raise InvalidFileException(msg)

    if is_file_like:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if getattr(filename, 'encoding', None) is not None:
            raise IOError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        f = repair_central_directory(filename, is_file_like)
        archive = ZipFile(f, 'r', ZIP_DEFLATED)

    wb = Workbook(guess_types=guess_types,
                  data_only=data_only,
                  read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    try:
        _load_workbook(wb, archive, filename, read_only, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb