def _get_sheet_data(sh: xlrd.sheet.Sheet, datemode: int) -> pd.DataFrame: """Process a Microsoft Excel sheet, returning a Pandas DataFrame Args: sh: the sheet to be processed datemode: integer to pass as argument to _get_row_data() Returns: pd.DataFrame: all data in the given sheet with normalized names and types """ maturity = sh.cell_value(0, 1) if isinstance(maturity, float): maturity = datetime.datetime(*xlrd.xldate_as_tuple(maturity, datemode)) else: maturity = datetime.datetime.strptime(maturity, "%d/%m/%Y") bond, series = sh.name.rsplit(" ", maxsplit=1) bond = BONDS["aliases"][bond.replace("-", "").lower()] # Fix bonds names header = tuple(c.value for c in sh.row(1) if c.value != "") rows = (r for r in itertools.islice(sh.get_rows(), 2, None) if r[1].ctype != 0 and r[1].value != "") data = (_get_row_data(row, datemode) for row in rows) df = pd.DataFrame.from_records(data, columns=header) df = df.assign( MaturityDate=maturity, BondCode=sh.name, BondName=bond, BondSeries=series, ) return df
def get_document_title(workbook: xlrd.book.Book, orderform_sheet: xlrd.sheet.Sheet) -> str: """Get the document title for the order form.""" if "information" in workbook.sheet_names(): information_sheet = workbook.sheet_by_name("information") document_title = information_sheet.row(0)[2].value return document_title document_title = orderform_sheet.row(0)[1].value return document_title
def load_two_first_columns_preincrement(sheet: xlrd.sheet.Sheet): """Reads dataset with X from two first columns, skips first row :param sheet: xlrd.sheet.Sheet :return: X, y: [], [] """ line_number = 0 line = sheet.row(line_number) number_of_columns = len(line) X, y = np.zeros((sheet.nrows, 2)), np.zeros(sheet.nrows, dtype=np.int) while line_number < sheet.nrows - 1: line_number += 1 line = sheet.row(line_number) row = [] for i in range(2): # range(number_of_columns - 1): row.append(float(line[i].value)) X[line_number - 1, :] = row y[line_number - 1] = int(line[number_of_columns - 1].value) return X, y
def get_row_values(sheet: xlrd.sheet.Sheet, rowx: int, datemode: int = DEFAULT_DATEMODE) -> tuple: """Get the values from a row in a sheet. Returns: A tuple of cell values. The cell values can be any of str, bool, int, or float. """ row = sheet.row(rowx) values = (Worksheet.cell_to_value(cell, datemode) for cell in row) return tuple(values)
def dataCheck(sheet1: xlrd.sheet.Sheet): checkCmd = True # 行数检查 if sheet1.nrows < 2: print("没数据啊哥") checkCmd = False # 每行数据检查 i = 1 while i < sheet1.nrows: # 第1列 操作类型检查 cmdType = sheet1.row(i)[0] if cmdType.ctype != 2 or int(cmdType.value) not in range(1, 8): # cmdType.value != 1.0 and cmdType.value != 2.0 and cmdType.value != 3.0 # and cmdType.value != 4.0 and cmdType.value != 5.0 and cmdType.value != 6.0): print('第', i + 1, "行,第1列数据有毛病") checkCmd = False # 第2列 内容检查 cmdValue = sheet1.row(i)[1] # 读图点击类型指令,内容必须为字符串类型 if cmdType.value == 1.0 or cmdType.value == 2.0 or cmdType.value == 3.0: if cmdValue.ctype != 1: print('第', i + 1, "行,第2列数据有毛病") checkCmd = False # 输入类型,内容不能为空 if cmdType.value == 4.0: if cmdValue.ctype == 0: print('第', i + 1, "行,第2列数据有毛病") checkCmd = False # 等待类型,内容必须为数字 if cmdType.value == 5.0: if cmdValue.ctype != 2: print('第', i + 1, "行,第2列数据有毛病") checkCmd = False # 滚轮事件,内容必须为数字 if cmdType.value == 6.0: if cmdValue.ctype != 2: print('第', i + 1, "行,第2列数据有毛病") checkCmd = False i += 1 return checkCmd
def parse_head(self, sheet: xlrd.sheet.Sheet): field_name = sheet.row(0) field_type = sheet.row(1) for i in range(0, len(field_name)): fname = field_name[i].value tname = i < len(field_type) and field_type[i].value or '' if tname == 'null' or fname == '': continue ftype = FieldTypeMgr.parse_type_by_name(tname) if ftype == FieldType.E_None: continue cell_head = SheetCellHead() cell_head.set_data(i, fname, ftype) self.sheet_heads.append(cell_head)
def parse_rows(self, sheet: xlrd.sheet.Sheet): for i in range(3, sheet.nrows): row_data = SheetRowData(self.sheet_heads) row_data.parse_row(sheet.row(i)) self.sheet_rows.append(row_data)
def _init_fields(self, sheet: xlrd.sheet.Sheet): fields_row = sheet.row(0) for field in fields_row: self.fields.append(field.value)