def _write_charts(self): # delegate to object if len(self._charts) != len(set(self._charts)): raise InvalidFileException("The same chart cannot be used in more than one worksheet") for chart in self._charts: self._archive.writestr(chart.path[1:], tostring(chart._write())) self.manifest.append(chart)
def _validate_archive(filename): """ Does a first check whether filename is a string or a file-like object. If it is a string representing a filename, a check is done for supported formats by checking the given file-extension. If the file-extension is not in SUPPORTED_FORMATS an InvalidFileException will raised. Otherwise the filename (resp. file-like object) will forwarded to zipfile.ZipFile returning a ZipFile-Instance. """ is_file_like = hasattr(filename, 'read') if not is_file_like: file_format = os.path.splitext(filename)[-1].lower() if file_format not in SUPPORTED_FORMATS: if file_format == '.xls': msg = ('openpyxl does not support the old .xls file format, ' 'please use xlrd to read this file, or convert it to ' 'the more recent .xlsx file format.') elif file_format == '.xlsb': msg = ('openpyxl does not support binary format .xlsb, ' 'please convert this file to .xlsx format if you want ' 'to open it with openpyxl') else: msg = ('openpyxl does not support %s file format, ' 'please check you can open ' 'it with Excel first. ' 'Supported formats are: %s') % ( file_format, ','.join(SUPPORTED_FORMATS)) raise InvalidFileException(msg) archive = ZipFile(filename, 'r') return archive
def _validate_archive(filename): """ Check the file is a valid zipfile """ is_file_like = hasattr(filename, 'read') if not is_file_like and os.path.isfile(filename): file_format = os.path.splitext(filename)[-1].lower() if file_format not in SUPPORTED_FORMATS: if file_format == '.xls': msg = ('openpyxl does not support the old .xls file format, ' 'please use xlrd to read this file, or convert it to ' 'the more recent .xlsx file format.') elif file_format == '.xlsb': msg = ('openpyxl does not support binary format .xlsb, ' 'please convert this file to .xlsx format if you want ' 'to open it with openpyxl') else: msg = ('openpyxl does not support %s file format, ' 'please check you can open ' 'it with Excel first. ' 'Supported formats are: %s') % ( file_format, ','.join(SUPPORTED_FORMATS)) raise InvalidFileException(msg) if is_file_like: # fileobject must have been opened with 'rb' flag # it is required by zipfile if getattr(filename, 'encoding', None) is not None: raise IOError("File-object must be opened in binary mode") archive = ZipFile(filename, 'r') return archive
def load_workbook_and_get_emails(path_to_wb: str, sheet_name: str, column_emails: str) -> list: """ This function open a xlsx file and returns all the emails in a list :param path_to_wb: This is the path to the xlsx file. E.g: /home/mendes/workbook.xlsx :param sheet_name: This is the workbook sheet name, which contains the emails data :param column_emails: This is a letter that represent the letter of the column. E.g: A, B, C, D :return: This function returns a list of emails. """ try: sheet = openpyxl.load_workbook(path_to_wb)[sheet_name] except KeyError: raise KeyError("Any sheet found with that title.") except InvalidFileException: raise InvalidFileException("Couldn't open the path expecified.") except SheetTitleException: raise SheetTitleException("Sheet title Misspelled.") # All rows in the xlsx file emails = [] for row_count in range(2, sheet.max_row + 1): try: emails.append(sheet.cell(row=row_count, column=int(alphabet_position(column_emails.lower()))).value) except ValueError: print("Value not found in row: {}".format(row_count)) continue emails = [email for email in emails if email is not None] return emails
def test_bad_xlsx(): """ test bad_xlsx """ error_list = [] error = InvalidFileException("some_error_message") bad_xlsx(error_list, "some_path", error) assert error_list == [{ "explanation": "The file was not recognized as a valid xlsx sheet and could not be" + " loaded.", "affected_paths": [], "raw_or_parse": "RAW", "severity": "CRITICAL", }]
def load_workbook_and_get_emails(path_to_wb: str, sheet_name: str, column_id: str, column_emails: str) -> list: """ This function open a xlsx file and returns all the emails in a list :param path_to_wb: This is the path to the xlsx file. E.g: /home/mendes/workbook.xlsx :param sheet_name: This is the workbook sheet name, which contains the emails data :param column_emails: This is a letter that represent the letter of the column. E.g: A, B, C, D :return: This function returns a list of emails. """ try: sheet = openpyxl.load_workbook(path_to_wb)[sheet_name] except KeyError: raise KeyError("Any sheet found with that title.") except InvalidFileException: raise InvalidFileException("Couldn't open the path expecified.") except SheetTitleException: raise SheetTitleException("Sheet title Misspelled.") # All rows in the xlsx file list_id_with_emails_temp = [] for row_count in range(2, sheet.max_row + 1): try: email_number_conversor_from_letter = alphabet_position( column_emails.lower()) id_number_conversor_from_letter = alphabet_position( column_id.lower()) _id = sheet.cell(row=row_count, column=int(id_number_conversor_from_letter)).value email = sheet.cell( row=row_count, column=int(email_number_conversor_from_letter)).value list_id_with_emails_temp.append((_id, email)) except ValueError: print("Value not found in row: {}".format(row_count)) continue list_id_with_emails = [] for values in list_id_with_emails_temp: if values[0] != None and values[1] != None: list_id_with_emails.append(values) return list_id_with_emails
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ read_only = read_only or use_iterators is_file_like = hasattr(filename, 'read') if not is_file_like and os.path.isfile(filename): file_format = os.path.splitext(filename)[-1] if file_format not in SUPPORTED_FORMATS: if file_format == '.xls': msg = ('openpyxl does not support the old .xls file format, ' 'please use xlrd to read this file, or convert it to ' 'the more recent .xlsx file format.') elif file_format == '.xlsb': msg = ('openpyxl does not support binary format .xlsb, ' 'please convert this file to .xlsx format if you want ' 'to open it with openpyxl') else: msg = ('openpyxl does not support %s file format, ' 'please check you can open ' 'it with Excel first. ' 'Supported formats are: %s') % ( file_format, ','.join(SUPPORTED_FORMATS)) raise InvalidFileException(msg) if is_file_like: # fileobject must have been opened with 'rb' flag # it is required by zipfile if getattr(filename, 'encoding', None) is not None: raise IOError("File-object must be opened in binary mode") try: archive = ZipFile(filename, 'r', ZIP_DEFLATED) except BadZipfile: f = repair_central_directory(filename, is_file_like) archive = ZipFile(f, 'r', ZIP_DEFLATED) wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only) if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') try: _load_workbook(wb, archive, filename, read_only, keep_vba) except KeyError: e = exc_info()[1] raise InvalidFileException(unicode(e)) archive.close() return wb