def parse_schema_from_rows(self, rows): shema = [] column_names = [] for row in rows: item = {'name': row[0], 'type': row[1], 'comment': row[-1]} if item['name'] in column_names: raise ParseSchemaError('%s column has the same name' % item['name']) column_names.append(item['name']) keyword = reg_keyword(item['comment']) ctype = reg_cmd(item['type']) if not keyword: keyword = item['type'] cmd = reg_cmd(keyword) if keyword else ctype rets = reg_args(keyword) if cmd == 'enum' or cmd == 'order_enum': if len(rets) == 0: raise EnumMustNotEmptyError # 如果enum类型只有一个值,则产生固定值 # 如果enum类型只有一个值,且以file://开头,则读取文件 if len(rets) == 1 and rets[0].startswith(ENUM_FILE): rets = read_file_lines(rets[0][len(ENUM_FILE):]) if ctype in INT_TYPES: args = [int(ret) for ret in rets] elif ctype in FLOAT_TYPES: args = [float(ret) for ret in rets] else: args = rets elif cmd in INT_TYPES: args = [int(ret) for ret in rets] args.append(True) if 'unsigned' in keyword else args.append( False) elif cmd == 'op': args = [ process_op_args(rets[0], 'columns'), ] else: try: args = [int(ret) for ret in rets] except: args = rets item['cmd'] = cmd item['ctype'] = ctype item['args'] = args shema.append(item) return shema
def construct_meta_rows(self): """ 元数据文件中每行中每个字段以||分割,一共有三列: 第一列表示:字段名 第二列表示:字段类型 第三列表示:带标记的字段注释 :return: """ filepath = self.args.meta lines = read_file_lines(filepath) rows = [] for line in lines: words = line.split("||") if len(words) != 3: raise ParseSchemaError('parse schema error, %s' % line) rows.append([word.strip() for word in words]) return rows
def parse_schema_from_text(self, text): keywords = reg_all_keywords(text) schema = [] for keyword in keywords: cmd = reg_cmd(keyword) rets = reg_args(keyword) if cmd == 'enum' or cmd == 'order_enum': if len(rets) == 0: raise EnumMustNotEmptyError # 如果enum类型只有一个值,则产生固定值 # 如果enum类型只有一个值,且以file://开头,则读取文件 if len(rets) == 1 and rets[0].startswith(ENUM_FILE): args = read_file_lines(rets[0][len(ENUM_FILE):]) else: # 枚举全部当做字符类型 args = rets elif cmd in INT_TYPES: args = [int(ret) for ret in rets] args.append(True) if 'unsigned' in keyword else args.append( False) elif cmd == 'op': args = [ process_op_args(rets[0], 'columns'), ] else: try: args = [int(ret) for ret in rets] except: args = rets item = {'cmd': cmd, 'args': args} schema.append(item) return schema
def test_read_file_lines(self): filepath = "__init__.py" read_file_lines(filepath)