def parse(self, file): excel_book = pyexcel.iget_book(file_type="xlsx", file_content=file.read()) # Handle multiple sheets for sheet_name in excel_book.sheet_names(): reader = excel_book[sheet_name].to_array() yield from self.parse_excel_csv_reader(reader)
def test_get_sheet_from_dict(self): adict = {"X": [1, 4], "Y": [2, 5], "Z": [3, 6]} test_sheet_name = "custom_sheet" book = pe.iget_book(adict=adict, sheet_name=test_sheet_name) expected = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] result = book.to_dict() eq_(expected, list(result[test_sheet_name]))
def test_get_sheet_from_records(self): records = [{"X": 1, "Y": 2, "Z": 3}, {"X": 4, "Y": 5, "Z": 6}] test_sheet_name = "custom_sheet" book = pe.iget_book(records=records, sheet_name=test_sheet_name) expected = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] result = book.to_dict() eq_(expected, list(result[test_sheet_name]))
def test_get_book_from_memory(self): content = _produce_ordered_dict() io = pe.save_book_as(dest_file_type="xls", bookdict=content) book_stream = pe.iget_book(file_content=io.getvalue(), file_type="xls") assert book_stream.to_dict() != content book = pe.Book(book_stream.to_dict()) eq_(book.to_dict(), content)
def test_look_at_sheet_names_without_incurring_further_memory_cost(self): test_file = "test_get_book.xls" content = _produce_ordered_dict() book = pe.Book(content) book.save_as(test_file) book_stream = pe.iget_book(file_name=test_file) eq_(book_stream.sheet_names(), ["Sheet1", "Sheet2", "Sheet3"]) assert isinstance(book_stream["Sheet1"].payload, GeneratorType) os.unlink(test_file)
def test_look_at_sheet_names_decides_to_read_seond_one(self): test_file = "test_get_book.xls" content = _produce_ordered_dict() book = pe.Book(content) book.save_as(test_file) book_stream = pe.iget_book(file_name=test_file) data = pe.iget_array(sheet_stream=book_stream["Sheet1"]) assert isinstance(data, GeneratorType) eq_(list(data), [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) os.unlink(test_file)
def test_get_book_from_file(self): test_file = "test_get_book.xls" content = _produce_ordered_dict() book = pe.Book(content) book.save_as(test_file) book_stream = pe.iget_book(file_name=test_file) assert book_stream.to_dict() != content book3 = pe.Book(book_stream.to_dict()) eq_(book3.to_dict(), content) os.unlink(test_file)
def _openbook(path): """Support function for 'filltables', iterator over an 'Excel' file. ARGUMENTS: - path : the file path RETURNS: - an iterator of tuples (sheet,key,metadata)""" # We open the file fil = pyexcel.iget_book(file_name=path) # We get a dict of sheets d_sheets = {} for n_sheet in fil.sheets: low = n_sheet.lower() d_sheets[low] = n_sheet # We get the 'languages' sheet first d_langs = {} sheet = d_sheets.get('languages') if sheet: sheet = fil.sheets[sheet].get_internal_array() sheet = _readarray(0, sheet) for code, metadata in sheet.items(): d_langs[code] = True yield ('languages', code, metadata) # We get 'editors' next sheet = d_sheets.get('editors') if sheet: sheet = fil.sheets[sheet].get_internal_array() sheet = _readarray(1, sheet) for code, metadata in sheet.items(): yield ('editors', code, metadata) # Followed by 'sources' sheet = d_sheets.get('sources') if sheet: sheet = fil.sheets[sheet].get_internal_array() sheet = _readarray(1, sheet) for code, metadata in sheet.items(): yield ('sources', code, metadata) # We finally get each language sheet for code in d_langs: sheet = fil.sheets.get(code, None) if not sheet: continue sheet = sheet.get_internal_array() sheet = _readarray(1, sheet) for name, metadata in sheet.items(): yield (code, name, metadata) pyexcel.free_resources()
def read_dict_sheet(xlsx_path: str, sheet_id_or_name=0) -> list: """ :param xlsx_path: xlsx文件路径 :param sheet_id_or_name: 标签页的id或名称 :return: 返回值 """ book = p.iget_book(file_name=xlsx_path) sheet = None if type(sheet_id_or_name) == int: # 输入的定位标是数字 if book.number_of_sheets() < sheet_id_or_name: # 如果没有读取到数据,就返回空列表 pass else: name = book.sheet_names()[sheet_id_or_name] sheet = book[name] if type(sheet_id_or_name) == str: # 输入的定位标是文本 sheet = book[sheet_id_or_name] if sheet == None: return [] ret = [] cache = sheet.array if len(cache) < 1: return [] keys = cache[0] for i in range(1, len(cache)): data = {} for j in range(len(keys)): if len(cache[i]) < j + 1: data[keys[j]] = None else: data[keys[j]] = cache[i][j] ret.append(data) return ret pass
def test_get_sheet_from_array(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] test_sheet_name = "custom_sheet" book = pe.iget_book(array=data, sheet_name=test_sheet_name) result = book.to_dict() eq_(data, list(result[test_sheet_name]))
def test_get_book_from_memory_compatibility(self): content = _produce_ordered_dict() io = pe.save_book_as(dest_file_type="xls", bookdict=content) pe.iget_book(content=io.getvalue(), file_type="xls")
def test_get_book_from_book_dict(self): content = _produce_ordered_dict() book = pe.iget_book(bookdict=content) eq_(book.to_dict(), content)