def get_document_title(workbook: xlrd.book.Book, orderform_sheet: xlrd.sheet.Sheet) -> str: """Get the document title for the order form.""" if "information" in workbook.sheet_names(): information_sheet = workbook.sheet_by_name("information") document_title = information_sheet.row(0)[2].value return document_title document_title = orderform_sheet.row(0)[1].value return document_title
def sheet_of( request, workbook: xlrd.book.Book ) -> Tuple[Tuple[int, int], Callable[[int, int], str]]: if workbook.nsheets != 1: raise FileError().with_message( request, f'excel can only contain 1 sheet, sheets: {workbook.nsheets}') sheet = workbook.sheet_by_index(0) m, n = sheet.nrows, sheet.ncols if m == 0 or n == 0: raise FileError().with_message(request, 'empty excel sheet') def value_of(i, j) -> str: cell = sheet.cell(i, j) if cell.ctype == xlrd.XL_CELL_EMPTY: return '' elif cell.ctype == xlrd.XL_CELL_TEXT: return cell.value elif cell.ctype == xlrd.XL_CELL_NUMBER: return str(cell.value) elif cell.ctype == xlrd.XL_CELL_DATE: date = xlrd.xldate.xldate_as_tuple(cell.value, datemode=0) # datemode return f'{date[0]}-{date[1]}-{date[2]}' elif cell.ctype == xlrd.XL_CELL_BOOLEAN: return str(True if cell.value == 1 else False) else: raise FileError().with_message(request, f'cell type error at ({i},{j})') return (m, n), value_of
def read_worksheet_settings(wks: xlrd.book.Book) -> Dict[str, Any]: sheet = wks.sheet_by_name("Settings") result = {} for cur_row in range(sheet.nrows): key = str(sheet.cell(cur_row, 0).value) if key == "": continue if key == "Formulas": break values = [] for cur_col in range(1, sheet.ncols): cur_value = sheet.cell(cur_row, cur_col).value if str(cur_value) == "": break values.append(cur_value) if len(values) == 1: values = values[0] result[key] = values return result
def _read_xls(self, wb: xlrd.book.Book): data = list() ws = wb.sheet_by_index(0) # type: xlrd.sheet.Sheet row = 7 while ws.cell_value(row, 0) != '': dgu_id = int(ws.cell_value(row, 0)) node_id = int(ws.cell_value(row, 2)) data_row = ws.row_values(row, 4, 124) for i in range(0, 5 * 24, 5): data.append( (dgu_id, i // 5, node_id, data_row[i], data_row[i + 1], data_row[i + 2], data_row[i + 3], data_row[i + 4])) row += 1 data = pd.DataFrame(data, columns=[ 'dgu_id', 'hour', 'node_id', 'p_min_tech', 'p_min_techn', 'p_min', 'v_ppp', 'p_max' ]) data['date'] = pd.to_datetime(self._context[0]) # в какой-то момент int превращаются в float data.node_id = data.node_id.astype(int) data.hour = data.hour.astype(int) data.dgu_id = data.dgu_id.astype(int) self.data.append(data)
def extract_retailer(wb: xlrd.book.Book) -> str: sheet = wb.sheet_by_index(0) for index, cell in enumerate(sheet.col(0)): if 'PURCHASE ORDER TO' in cell.value: cell: xlrd.sheet.Cell = sheet.cell(index + 1, 0) assert ctype_text.get(cell.ctype) == 'text', 'invalid purchase order cell' return cell.value return None
def _read_xls(self, wb: xlrd.book.Book): ws = wb.sheet_by_index(0) data = pd.DataFrame(data=ws._cell_values[7:], columns=['date', 'hour', 'fact']) data['region_id'] = region_map[ws._cell_values[1][1]] data['date'] = pd.to_datetime(data['date']) data['hour'] = data.hour.astype(int) - 1 self.data.append(data)
def extract_po(wb: xlrd.book.Book) -> str: sheet = wb.sheet_by_index(0) # try col 9, 10 for i in range(9, 11): cell: xlrd.sheet.Cell = sheet.cell(2, i) if ctype_text.get(cell.ctype) != 'text': continue po = cell.value if po_re.match(po): return po return None
def _read_xls(self, wb: xlrd.book.Book): ws = wb.sheet_by_index(0) data = pd.DataFrame( data=ws._cell_values[6:], columns=['region1_id', 'region2_id', 'hour', 'flow']) data.replace('', null, inplace=True) data['region1_id'] = data.region1_id.replace(region_map).astype(int) data['region2_id'] = data.region2_id.replace(region_map).astype(int) data['hour'] = data.hour.astype(int) data['date'] = pd.to_datetime(self._context[0]) self.data.append(data)
def _read_xls(self, wb: xlrd.book.Book): ws = wb.sheet_by_index(0) data = pd.DataFrame( data=ws._cell_values[6:], columns=[ 'region_id', 'hour', 'gen_ges', 'gen_aes', 'gen_tes', 'gen_ses', 'gen_ves', 'gen_other', 'pmin_tech_ges', 'pmin_tech_aes', 'pmin_tech_tes', 'pmin_tech_ses', 'pmin_tech_ves', 'pmin_tech_other', 'pmin_ges', 'pmin_aes', 'pmin_tes', 'pmin_ses', 'pmin_ves', 'pmin_other', 'pmax_ges', 'pmax_aes', 'pmax_tes', 'pmax_ses', 'pmax_ves', 'pmax_other', 'con', 'exp', 'imp', 'price_con', 'price_gen' ]) data.replace('', null, inplace=True) data['region_id'] = data.region_id.replace(region_map).astype(int) data['hour'] = data.hour.astype(int) data['date'] = pd.to_datetime(self._context[0]) self.data.append(data)
def read_worksheet_table(wks: xlrd.book.Book) -> Dict[str, Any]: """Read a table of numbers from an Excel file saved by Keithley. This function reads the first worksheet in the Excel file passed as argument and returns a dictionary associating NumPy arrays with their names. """ sheet = wks.sheet_by_index(0) result = OrderedDict() nrows = sheet.nrows for cur_col in range(sheet.ncols): name = sheet.cell(0, cur_col).value if len(name) >= len("START") and name[:5] == "START": # This column and the following are not useful break result[name] = np.array([sheet.cell(i, cur_col).value for i in range(1, nrows)]) return result
def _read_xls(self, wb: xlrd.book.Book): data = list() for ws in wb.sheets(): # type: xlrd.sheet.Sheet hour_data = pd.DataFrame(data=ws._cell_values[3:], columns=[ 'node_id', 'node_name', 'u', 'region_id', 'price', 'empty' ]) hour_data.drop(['node_name', 'u', 'empty'], axis=1, inplace=True) hour_data['hour'] = int(ws.name) hour_data['region_id'] = hour_data.region_id.replace( region_map).astype(int) hour_data['price'] = pd.to_numeric(hour_data.price) data.append(hour_data) data = pd.concat(data) # type: pd.DataFrame data['date'] = pd.to_datetime(self._context[0]) # в какой-то момент int превращаются в float data.node_id = data.node_id.astype(int) data.hour = data.hour.astype(int) self.data.append(data)
def _read_xls(self, wb: xlrd.book.Book): data = list() for ws in wb.sheets(): # type: xlrd.sheet.Sheet hour_data = pd.DataFrame( data={ 'hour': int(ws.name), 'node_from': ws.col_values(0, 5), 'node_to': ws.col_values(1, 5), 'branch_num': ws.col_values(2, 5), 'flow': ws.col_values(3, 5) }) data.append(hour_data) data = pd.concat(data) # type: pd.DataFrame data['date'] = pd.to_datetime(self._context[0]) # в какой-то момент int превращаются в float data.node_from = data.node_from.astype(int) data.node_to = data.node_to.astype(int) data.branch_num = data.branch_num.astype(int) data.hour = data.hour.astype(int) self.data.append(data)
def _read_xls(self, wb: xlrd.book.Book): data = list() for ws in wb.sheets(): # type: xlrd.sheet.Sheet hour_data = pd.DataFrame(data=ws._cell_values[3:], columns=[ 'section_id', 'name', 'node1', 'node2', 'branch_name', 'p_min', 'p_max', 'v_ppp' ]) hour_data.replace(['', ' '], [null, null], inplace=True) t = hour_data.p_min.isnull() & hour_data.p_max.isnull() hour_data.drop(hour_data.index.values[t], inplace=True) hour_data.p_min = hour_data.p_min.astype(str).str.replace( ',', '.').astype('float') hour_data.p_max = hour_data.p_max.astype(str).str.replace( ',', '.').astype('float') hour_data.v_ppp = hour_data.v_ppp.astype(str).str.replace( ',', '.').astype('float') hour_data['is_active'] = ((hour_data.p_min == hour_data.v_ppp) | (hour_data.p_max == hour_data.v_ppp)) * 1 hour_data.drop([ 'name', 'node1', 'node2', 'branch_name', 'p_min', 'p_max', 'v_ppp' ], axis=1, inplace=True) hour_data['hour'] = int(ws.name) data.append(hour_data) data = pd.concat(data) # type: pd.DataFrame data['date'] = pd.to_datetime(self._context[0]) # в какой-то момент int превращаются в float data.section_id = data.section_id.astype(int) data.hour = data.hour.astype(int) data.is_active = data.is_active.astype(int) self.data.append(data)
def _read_xls(self, wb: xlrd.book.Book): data = list() for ws in wb.sheets(): # type: xlrd.sheet.Sheet p_list = ws.col_values(3, 8) if p_list[0] == "*": p_list[0] = 0 hour_data = pd.DataFrame( data={ 'hour': int(ws.name), 'price': p_list, 'volume': ws.col_values(4, 8) }) data.append(hour_data) data = pd.concat(data) # type: pd.DataFrame date, pz = self._context data['date'] = pd.to_datetime(date) data['pz'] = pz # в какой-то момент int превращаются в float data.pz = data.pz.astype(int) data.hour = data.hour.astype(int) self.data.append(data)
def _get_sheet(book: xlrd.book.Book, sheet_name=False) -> pd.DataFrame: """指定したsheet nameのシートをpandasで開く""" sheet = book.sheet_by_name(sheet_name) table = [sheet.row_values(row) for row in range(sheet.nrows)] index = _get_content_start_row_index(table) return pd.DataFrame(table[index + 1:], columns=table[index])