def parse_header(self, rows): header_row = [self.extract_cell_value(cell) for cell in rows[self.header_row_index]] field_row = [self.extract_cell_value(cell) for cell in rows[self.field_row_index]] type_row = [self.extract_cell_value(cell) for cell in rows[self.type_row_index]] for col, field in enumerate(field_row): if field == "": break self.converters[col] = None if field in self.field_2_col: util.log_error("列名'%s'重复,列:%s", field, util.int_to_base26(col)) continue self.field_2_col[field] = col header = header_row[col] or field type = type_row[col] or "String" method = None try: method = FAST_CONVERTER[type.lower()] except: util.log_error("无效的类型'%s',列:%s", type, util.int_to_base26(col)) continue self.converters[col] = ConverterInfo((header, field, method, True)) self.sheet_types[field] = (col, field, header, type) self.key_name = self.converters[0].field return
def parse_arguments(self, rows): row_index = self.argument_row_index arg_row = [self.extract_cell_value(cell) for cell in rows[row_index]] self.arguments = {} for col in xrange(0, len(arg_row), 2): header = arg_row[col] if header is None: break converter = xlsconfig.ARGUMENT_CONVERTER.get(header) if converter is None: continue field, type = converter method = FAST_CONVERTER[type.lower()] value = arg_row[col + 1] ret = None try: ret = method(value) except: traceback.print_exc() log_error("参数转换失败,(%d, %s) = [%s]", row_index + 1, util.int_to_base26(col), value) self.arguments[field] = ret return
def parse_header(self, rows): header_row = [ self.extract_cell_value(cell) for cell in rows[self.header_row_index] ] name_set = set() for col, header in enumerate(header_row): if header == "": break converter = None cfg = self.header_2_config.get(header) if cfg is None: print "警告:第(%s)列的表头'%s'没有被解析。%s" % ( util.int_to_base26(col), header, self.filename, ) else: converter = ConverterInfo(cfg) field = converter.field self.field_2_col[field] = col type = tp0.type2string(converter.convert) self.sheet_types[field] = (col, field, header, type) self.converters[col] = converter return
def write_types_comment(self, sheet_name): if not self.data_module: return module_info = self.data_module.info sheet_types = module_info["sheet_types"].get(sheet_name) if sheet_types is None: return sheet_types = sheet_types.values() sheet_types.sort(key=lambda v: v[0]) for info in sheet_types: col, field, text, type = info col_name = util.int_to_base26(col) if col is not None else "None" comment = "%s\t%-20s%s" % (col_name, field, text) self.write_comment(comment)
def do_parse(self): import openpyxl self.workbook = openpyxl.load_workbook(self.filename) sheets = self.workbook.worksheets if self.sheet_index >= len(sheets): log_error("Excel表'%s'没有子表'%d'", self.filename, self.sheet_index) return table = sheets[self.sheet_index] self.worksheet = table rows = list(table.rows) self.parse_arguments(rows) self.parse_header(rows) self.parse_defaults(rows) if self.data_row_index >= len(rows): return ncols = len(self.converters) # the remain rows is raw data. for r in xrange(self.data_row_index, len(rows)): cells = rows[r] # 遇到空白行,表示解析完成 first_value = cells[0].value if first_value == '' or first_value is None: break current_row_data = {} for c in xrange(ncols): value = self.extract_cell_value(cells[c]) try: self.convert_cell(r, c, value, current_row_data) except: traceback.print_exc() util.log_error("单元格(%d, %s) = [%s] 数据解析失败", r + 1, util.int_to_base26(c), value) self.add_row(current_row_data) return