def TestInput(data): fdp = atheris.FuzzedDataProvider(data) try: cell.absolute_coordinate(fdp.ConsumeString(20)) cell.cols_from_range(fdp.ConsumeString(20)) cell.column_index_from_string(fdp.ConsumeString(20)) cell.coordinate_from_string(fdp.ConsumeString(20)) cell.coordinate_to_tuple(fdp.ConsumeString(20)) cell.get_column_interval(fdp.ConsumeInt(10),fdp.ConsumeInt(10)) cell.get_column_letter(fdp.ConsumeInt(10)) cell.quote_sheetname(fdp.ConsumeString(20)) cell.range_boundaries(fdp.ConsumeString(20)) cell.range_to_tuple(fdp.ConsumeString(20)) cell.rows_from_range(fdp.ConsumeString(20)) except ValueError as e: error_list = [ "is not a valid coordinate range", "Invalid column index", "is not a valid column name", "is not a valid coordinate or range", "Value must be of the form sheetname!A1:E4" ] expected_error = False for error in error_list: if error in str(e): expected_error = True if not expected_error: raise e except CellCoordinatesException: pass
def create_team_sheet(xlsx_workbook, team_name): blank_team_sheet = xlsx_workbook.get_sheet_by_name('Team Sheet') decision_sheet = xlsx_workbook.get_sheet_by_name('Decision Sheet') # find an opening in the decision sheet for this team's data dest_cell = '%s1' % get_column_letter( decision_sheet.max_column+1) team_sheet = xlsx_workbook.copy_worksheet(blank_team_sheet) team_sheet.title = team_name team_sheet['A1'] = team_name # Define start Range(target_start) in the new Worksheet min_col, min_row, max_col, max_row = range_boundaries(dest_cell) source_range = 'D1:E30' for row, row_cells in enumerate(decision_sheet[source_range], min_row): for column, cell in enumerate(row_cells, min_col): cell_value = cell.value if cell_value is not None: cell_value = cell_value.replace('Team 1', team_name) cell_value = cell_value.replace('Team Sheet', team_name) if cell_value.startswith('=PRODUCT'): frags = cell_value.split(',') frags[1] = frags[1].replace('D', get_column_letter(column-1)) cell_value = ','.join(frags) if cell_value.startswith('=SUM'): cell_value = cell_value.replace('E', get_column_letter(column) ) decision_sheet.cell(row=row, column=column).value = cell_value decision_sheet.cell(row=row, column=column).fill = copy(cell.fill) return team_sheet
def create_xlsx(self, list_rng, path_xlsx): for label in list_rng: base_rng = self.wb.defined_names[label] dests = base_rng.destinations offset_d = 0 offset_r = 0 for title, coord in dests: base_ColRow = np.array(list(range_boundaries(coord))) for defines in self.Range_xl[label]: self.write_xl(defines, base_ColRow, offset_r, offset_d) if 'RNGD' in label: offset_d = int( re.match('RNGD[0-9]*', label).group().replace('RNGD', '')) else: offset_d = 0 if 'RNGR' in label: offset_r = int( re.match('RNGR[0-9]*', label).group().replace('RNGR', '')) else: offset_r = 0 base_ColRow = self.offset_colrow(base_ColRow, offset_r, offset_d) self.wb.save(path_xlsx) self.wb.close()
def set_excel_col_autowidth(worksheet, min_width=10, max_width=50): """Adjust column width automatically in a given Excel worksheet :param worksheet: target worksheet :type worksheet: openpyxl.worksheet.worksheet.Worksheet :param min_width: minimum width of columns :param max_width: maximum width of columns :return: None """ merged_cells_bounds = [range_boundaries(str(c)) for c in worksheet.merged_cells.ranges if c.min_col != c.max_col] for col_idx, col in enumerate(worksheet.columns): col_idx += 1 col_letter = get_column_letter(col_idx) current_width = min_width worksheet.column_dimensions[col_letter].width = min_width for row_idx, row in enumerate(col): row_idx += 1 col_distr = next((b[2] - b[0] + 1 for b in merged_cells_bounds if row_idx in range(b[1], b[3]+1) and col_idx in range(b[0], b[2]+1)), 1) cell = worksheet.cell(row=row_idx, column=col_idx) chars = len(str(cell.value))/col_distr if cell.value is not None else 0 if cell.alignment.wrap_text: # if text is wrapped then get amount of chars from the longest line chars = max([len(l)/col_distr for l in str(cell.value).split('\n')]) # 2.5 - approximate width of filter icon desired_width = min(chars * 1.1 + 2.5, max_width) if desired_width > current_width: current_width = desired_width worksheet.column_dimensions[col_letter].width = current_width
def resolve_ranges(ranges, default_sheet='Sheet1'): sheet = None range_cells = collections.defaultdict(set) for rng in ranges.split(','): # Handle sheets in range. if '!' in rng: sheet_str, rng = rng.split('!') rng_sheet = resolve_sheet(sheet_str) if sheet is not None and sheet != rng_sheet: raise ValueError(f'Got multiple different sheets in ranges: ' f'{sheet}, {rng_sheet}') sheet = rng_sheet min_col, min_row, max_col, max_row = range_boundaries(rng) if (min_col is None or min_row is None or max_col is None or max_row is None): continue # Excel ranges are boundaries inclusive! for row_idx in range(min_row, max_row + 1): row_cells = range_cells[row_idx] for col_idx in range(min_col, max_col + 1): row_cells.add(col_idx) # Now convert the internal structure to a matrix of cell addresses. sheet = default_sheet if sheet is None else sheet sheet_str = sheet + '!' if sheet else '' return sheet, [[ f'{sheet_str}{get_column_letter(col_idx)}{row_idx}' for col_idx in sorted(row_cells) ] for row_idx, row_cells in sorted(range_cells.items())]
def get_merged_cell_value(self, worksheet, cell): cell_value = None merged_cells_range_boundaries = [range_boundaries(x) for x in worksheet.merged_cell_ranges] for boundary in merged_cells_range_boundaries: if cell.col_idx >= boundary[0] and cell.col_idx <= boundary[2] and cell.row >= boundary[1] and cell.row <= boundary[3]: cell_value = worksheet.cell(column=boundary[0], row=boundary[1]).value break return cell_value
def __init__(self, openpyxl_table: openpyxl_Table, parent: Sheet): self.parent = parent self.openpyxl_table = openpyxl_table (self.first_col, self.first_row, _, self.last_row) = range_boundaries(openpyxl_table.ref) self.columns: Dict[str, Column] = {} for openpyxl_column in openpyxl_table.tableColumns: self.columns[openpyxl_column.name] = Column(openpyxl_column, parent=self) super().__init__(children=self.columns)
def write_xl(self, dict_defines, base_ColRow, ofst_range_r, ofst_range_d): """Write data to Excel Parameters ------- dict_defines : dict[str,any] Excel's Defined Names base_ColRow : list[int] Position of the base range ofst_range_r : int value to offset Right ofst_range_d : int value to offset below """ for key in dict_defines.keys(): offset_r = 0 offset_d = 0 #get the positon of Excel's Defined Names my_range = self.wb.defined_names[key] dests = my_range.destinations for title, coord in dests: ws = self.wb[title] ColRow = np.array(list(range_boundaries(coord))) ColRow = self.add_colrow_next_range(base_ColRow, ColRow, ofst_range_r, ofst_range_d) #if Defined Name contains 'LOOP', Repeat by shifting the number of range #DN means shift N below. RN means shift N right if 'LOOPD' in key: offset_d = int( re.match('LOOPD[0-9]*', key).group().replace('LOOPD', '')) if 'LOOPR' in key: offset_r = int( re.match('LOOPR[0-9]*', key).group().replace('LOOPR', '')) loop_ColRow = ColRow.copy() #change the processing depending on Defined Name for loop in dict_defines[key]: if 'CELL' in key: self.write_cell(ws, loop_ColRow, loop) elif 'COL' in key: self.write_list_col(ws, loop_ColRow, loop) elif 'ROW' in key: self.write_list_row(ws, loop_ColRow, loop) elif 'MTR' in key: self.write_list_matrix(ws, loop_ColRow, loop) elif 'IMG' in key: self.add_img(ws, loop_ColRow, loop) loop_ColRow = self.offset_colrow(loop_ColRow, offset_r, offset_d)
def unMergeExcelCell(path): print('Opening workbook {0}'.format(path)) if not os.path.exists(path): print(("Could not find the excel file: " % path)) return #read workbook into memory wb = load_workbook(filename=path, data_only=True) print('Workbook loaded') #setup a couple progress counter(s) i = 0 num_worksheets = len(wb.worksheets) # loop over each worksheet in the workbook for ws in wb.worksheets: #maintain the worksheet counter i += 1 print('Worksheet progress "{0}" (Sheet {1} of {2})'.format( ws.title, i, num_worksheets)) #loop over each range in this worksheet for rng in tqdm.tqdm(ws.merged_cell_ranges[:]): #range_boundaries->Convert a range string into a tuple of boundaries: (min_col, min_row, max_col, max_row) #We'll zip the numbers to their logical names so we end up with a dictionary w/ named values for easier use rangeBounds = dict( zip(['MIN_COL', 'MIN_ROW', 'MAX_COL', 'MAX_ROW'], range_boundaries(rng))) #Capture the value we want to apply to all unmerged cells captured_cell_value = ws.cell(column=rangeBounds.get('MIN_COL'), row=rangeBounds.get('MIN_ROW')).value #Unmerge the range of cells ws.unmerge_cells(rng) #Update the individual cells to the orignal value for row in range(rangeBounds.get('MIN_ROW'), rangeBounds.get('MAX_ROW') + 1): for col in range(rangeBounds.get('MIN_COL'), rangeBounds.get('MAX_COL') + 1): #print("--R=",row," C=",col ) ws.cell(column=col, row=row, value=captured_cell_value) # save the un-merged excel file (origin_file, ext) = os.path.splitext(path) unmerge_excel_file = origin_file + '_unmerged' + ext print('Saving to disk as new file named "{0}"'.format(unmerge_excel_file)) wb.save(unmerge_excel_file) print("Save complete")
def __init__(self, sheet, address): self.sheet = sheet if isinstance(address, tuple): self._coords = address row, col, nrows, ncols = address if nrows and ncols: self.xl = sheet.xl.cells["%s:%s" % ( sheet.xl.rows[row].columns[col].get_address(), sheet.xl.rows[row + nrows - 1].columns[col + ncols - 1].get_address(), )] else: self.xl = None else: self.xl = sheet.xl.cells[address] bounds = range_boundaries(address) self._coords = (bounds[1], bounds[0], bounds[3] - bounds[1] + 1, bounds[2] - bounds[0] + 1)
def _range_dimensions(range_string): (min_col, min_row, max_col, max_row) = uc.range_boundaries(range_string) row_cnt = max_row - min_row + 1 col_cnt = max_col - min_col + 1 return row_cnt, col_cnt
wb = load_workbook(filename='Univ_Search_Results.xlsx') Sheets = wb.sheetnames new_wb = Workbook() #这种新建文件方式会自动生成一个名为Sheet的工作簿。 new_ws = new_wb['Sheet'] tb_head = ['学校代码', '学校名称', '一级学科代码', '一级学科名称', '评选结果'] #加上注释说明下面一行语句做了什么事情。 #用append函数往名为sheet的工作簿添加一行数据,A1取值为‘学校代码’,B1取值为‘学校名称’,C1取值为‘一级学科代码’,D1取值为‘一级学科名称’,E1取值为‘评选成果’ new_ws.append(tb_head) for sheetname in Sheets: ws = wb[sheetname] #加上注释说明下面一行语句做了什么事情。 #此for循环每遍历一次,max_row就被赋值为当前worksheet的最大行数(有效行数) max_row = cellutl.range_boundaries(ws.dimensions)[3] #max row number #加上注释说明下面一行语句做了什么事情。 #将BCD这三列从第三行到最后一行(最大行数),这一区块的数据赋值给tb tb = getExcR.load_workbook_range('B3:D' + str(max_row), ws) print(tb) #加上注释说明下面2行语句做了什么事情。 #以B1和D1的取值各自生成行数为最大行数减二(max_row-2)的两列,以行索引合并之后再与之前生成的td合并 tb = pd.concat([pd.Series([ws['B1'].value]*(max_row-2)), \ pd.Series([ws['D1'].value]*(max_row-2)), tb ], axis=1) #加上注释说明下面2行语句做了什么事情。 #通过行(row)遍历整个工作表填充相应的数据,并且工作表的行index和header都被舍弃 for row in dataframe_to_rows(tb, index=False, header=False): new_ws.append(row) #提供两种存储数据方法,第一种:存成csv文件,编码选择GB2312 #AllData.columns = tb_head
def range_(): r = yield range_t if '!' in r.value: return Range(*range_to_tuple(r.value)) else: return Range(None, range_boundaries(r.value))
def check_i_chart(excel_file_path: str, issues: Dict[str, List[str]]): check_ranges: List[Dict[Tuple[int, int], str]] = [ HEADER_VALUES, STATS_DATA_VALUES, DONT_TOUCH_VALUE ] wb = openpyxl.load_workbook(excel_file_path) ws = wb.worksheets[WORKSHEET_INDEX] excel_file_path = os.path.basename(excel_file_path) # check text values in first row of data worksheet # check 'stats data' formulas in second row of data worksheet for cell_addresses_and_expected_values in check_ranges: for cell_address, expectation in cell_addresses_and_expected_values.items( ): try: if ws.cell(*cell_address).value != expectation: issues[excel_file_path].append( '%s: %s should be %s' % (excel_file_path, ws.cell(*cell_address).coordinate, expectation)) except AttributeError: pass # TODO can't check value of merged cell # check number of tables on data worksheet if len(ws._tables) > 1: issues[excel_file_path].append('%s: %s tables found, 1 expected' % (excel_file_path, len(ws._tables))) data_table: openpyxl.worksheet.table.Table = None # check table name try: data_table = [ table for table in ws._tables if table.name == DATA_TABLE_CONFIG['name'] ][0] except IndexError: issues[excel_file_path].append( '%s: tables named %s expected, none found. Could not validate LCS entries' % (excel_file_path, DATA_TABLE_CONFIG['name'])) return # check number of cols if len(data_table.tableColumns) is not DATA_TABLE_CONFIG['num_cols']: issues[excel_file_path].append( '%s: %s columns expected, %s found' % (excel_file_path, DATA_TABLE_CONFIG['num_cols'], len(data_table.tableColumns))) # build list of iterators covering data in data_table data_table_range = range_boundaries(data_table.ref) data_table_range = { 'min_col': None, 'min_row': max(data_table_range[1], DATA_TABLE_CONFIG['data_min_row']), 'max_col': None, 'max_row': data_table_range[3], } data_table_iters = {} for col in itertools.chain(DATA_TABLE_COLS_WITH_USER_INPUT.keys(), DATA_TABLE_COLS_WITH_FORMULAS): data_table_range['min_col'] = data_table_range['max_col'] = col data_table_iters[col] = ws.iter_rows(**data_table_range) # check each values in each column for col_id, data_iter in data_table_iters.items(): if col_id in DATA_TABLE_COLS_WITH_FORMULAS.keys(): expected_formula = DATA_TABLE_COLS_WITH_FORMULAS[col_id] for cell in gen_cell(data_iter): if cell.value is expected_formula: pass else: issues[excel_file_path].append( '%s: invalid formula in cell %s' % (excel_file_path, cell.coordinate)) if col_id in DATA_TABLE_COLS_WITH_USER_INPUT.keys(): for cell in gen_cell(data_iter): expected_types = DATA_TABLE_COLS_WITH_USER_INPUT[col_id] if isinstance(cell.value, expected_types): pass else: issues[excel_file_path].append( '%s: %s expected in cell %s, %s found' % (excel_file_path, expected_types, cell.coordinate, cell.value))