def insert_general_food(self, field: str, parent_id: str, name: str, note: str = '', ontology: list = None): """ 新增其他领域的食品结点 :param field: 领域名称 :param parent_id: 父节点id :param name: 食品名称 :param note: 备注 :param ontology: 本体id的list :return: 新增结点id if 操作成功 else 错误信息 """ if field not in self.general_foods: raise Exception('新增食品结点失败,领域%s不存在' % field) field_foods = self.general_foods[field] if parent_id not in field_foods: raise Exception('新增食品结点失败,父节点%s不存在' % parent_id) parent_node: GeneralFoodNode = field_foods[parent_id] if parent_node.use_flag is False: raise Exception('新增标准食品失败!父节点已被移除,建议刷新页面以查看最新版本') # 从全局CONFIG中申请新id new_id = CONFIG.generate_new_id(field=field) # 创建新结点 new_node = GeneralFoodNode(node_id=new_id, name=name, parent_id=parent_id, field=field, ontology=ontology, note=note) # 父结点的children中加入它的id parent_node.add_child(new_id) # 加入到general_foods集合中 self.general_foods[field][new_id] = new_node return new_id
def generate_standard_attributes(): def build_attribute_tree(file_path): df = read_excel(file_path) ids = list(df['序号']) names = list(df['属性描述名称']) parents = list(df['上级编码']) codes = list(df['编码']) types = list(df['大类编码']) root = { 'id': 0, 'name': 'root', 'children': [], 'code': '', 'parent_code': '', 'type': '', 'parent_id': -1 } nodes = [root] added_id = set() while len(nodes) > 0: node = nodes[0] added_id.add(node['id']) nodes = nodes[1:] for i in range(len(df)): if parents[i] == node['code']: child = { 'id': ids[i], 'name': names[i], 'children': [], 'parent_code': parents[i], 'parent_id': node['id'], 'code': codes[i], 'type': types[i] } node['children'].append(child) nodes.append(child) not_added_id = [x for x in ids if x not in added_id] print('attributes of these ids are not added to the tree, check it!') print(not_added_id) return root attribute_tree = build_attribute_tree(CONFIG.standard_attributes_excel) save_json_file(food_json_tree_file, attribute_tree) # 根结点单独处理 root_id = CONFIG.generate_new_id('属性') all_data.standard_attributes[root_id] = StandardAttribute( root_id, 'root', '') id_dict = {-1: root_id} # excel中的id到实际系统id的映射 code2id = {} # 其余结点DFS遍历进行插入操作即可 def func(node): parent_id = id_dict[node['parent_id']] new_id = all_data.insert_standard_attribute(parent_id, node['name']) id_dict[node['id']] = new_id code2id[node['code']] = new_id dfs(attribute_tree, func) save_json_file(attribute_code_to_id_file, code2id)
def insert_standard_food(self, parent_id: str, name: str, note: str = ''): """ 新增统一标准中的食品结点 :param parent_id: 其父节点id :param name: 食品名称 :param note: 备注 :return: 新增结点id if 操作成功 else 错误信息 """ if parent_id not in self.standard_foods: # this should not happen without bug raise Exception('新增标准食品失败!父节点%s不存在!' % parent_id) parent_node: StandardFoodNode = self.standard_foods[parent_id] if parent_node.use_flag is False: raise Exception('新增标准食品失败!父节点已被移除,建议刷新页面以查看最新版本') # 从全局CONFIG中申请新id new_id = CONFIG.generate_new_id(field='食品') # 创建新结点 new_node = StandardFoodNode(node_id=new_id, name=name, parent_id=parent_id, note=note) # 父结点的children中加入它的id parent_node.add_child(new_id) # 加入到standard_foods集合中 self.standard_foods[new_id] = new_node return new_id
def insert_standard_attribute(self, parent_id: str, name: str, note: str = ''): """ 插入新的标准属性 :param parent_id: :param name: :param note: :return: """ # tips: 用CONFIG.generate_new_id(field='标准属性') 来自动生成新id,参考insert_standard_food if parent_id not in self.standard_attributes: # this should not happen without bug raise '新增标准属性失败!父节点%s不存在!' % parent_id parent_node: StandardAttribute = self.standard_attributes[parent_id] if parent_node.use_flag is False: return '新增标准属性失败!父节点已被移除,建议刷新页面以查看最新版本' # 从全局CONFIG中申请新id new_id = CONFIG.generate_new_id(field='属性') # 创建新结点 new_node = StandardAttribute(attribute_id=new_id, name=name, parent_id=parent_id, note=note) # 父结点的children中加入它的id parent_node.add_child(new_id) # 加入到standard_attributes集合中 self.standard_attributes[new_id] = new_node return new_id
def generate_standard_foods(): def make_tree_with_synonyms(file_path, ID, NAME, CODE, SYNONYM_NUM, SYNONYM): # 统一标准的食品结点转化为树状json df = read_excel(file_path) cols = list(df.columns) start_index = cols.index(SYNONYM) i = 0 root = { 'id': 0, 'name': 'root', 'children': [], 'parent_id': -1, 'code': 'F', 'synonym_num': 0, 'synonyms': [], 'path': [] } parent_codes = [root['code']] nodes = {root['code']: root} while i < len(df): food_id = df[ID][i].item() name = df[NAME][i] code: str = df[CODE][i] synonym_num = df[SYNONYM_NUM][i].item() node = { 'id': food_id, 'name': name, 'children': [], 'code': code, 'synonym_num': synonym_num, 'synonyms': [] } for j in range(synonym_num): synonym = df[cols[start_index + j]][i] if synonym == '': print('error in synonym of id: %d' % food_id) node['synonyms'].append(synonym) while not code.startswith(parent_codes[-1]): parent_codes.pop() parent_code = parent_codes[-1] parent_node = nodes[parent_code] node['parent_id'] = parent_node['id'] node['path'] = parent_node['path'] + [name] nodes[code] = node parent_node['children'].append(node) parent_codes.append(code) i += 1 return root food_tree = make_tree_with_synonyms(CONFIG.standard_foods_excel, ID='序号', NAME='一级分类名称', CODE='编码', SYNONYM_NUM='同义词数量', SYNONYM='同义词') save_json_file(attribute_json_tree_file, food_tree) # 根结点单独处理 root_id = CONFIG.generate_new_id('食品') all_data.standard_foods[root_id] = StandardFoodNode(root_id, 'root', '') id_dict = {-1: root_id} # excel中的id到实际系统id的映射 # 其余结点DFS遍历进行插入操作即可 code2id = {} def func(node): parent_id = id_dict[node['parent_id']] new_id = all_data.insert_standard_food(parent_id, node['name']) # 更新同义词 synonym_with_notes = dict() for synonym in node['synonyms']: synonym_with_notes[synonym] = '百度百科' all_data.modify_standard_food_synonyms(new_id, synonym_with_notes) id_dict[node['id']] = new_id code2id[node['code']] = new_id dfs(food_tree, func) save_json_file(food_code_to_id_file, code2id)
def generate_general_foods(): field_sheetname = CONFIG.general_filed_sheetname food_code2id = load_json_file(food_code_to_id_file) attribute_code2id = load_json_file(attribute_code_to_id_file) def parse_ontology(s, food_codes_set: set, attribute_codes_set: set, sheet, line, name): # 正则解析各种乱七八糟的编码格式 ontology = s.strip() if ontology == '': return [], [] food_code_pattern = r'F0[A-Z]((([0-9]){2}((\.)?([0-9]){2})*)?)' attribute_code_pattern = r'A[A-Z]((([0-9]){2}((\.)?([0-9]){2})*)?)' food_codes = [ ''.join(x.group().split('.')) for x in re.finditer(food_code_pattern, s) ] attribute_codes = [ ''.join(x.group().split('.')) for x in re.finditer(attribute_code_pattern, s) ] err_position = 'Sheet: %s, Line: %d, Name: %s' % (sheet, line, name) err_flag = False food_code_res = [] attribute_code_res = [] for code in food_codes: if code not in food_codes_set: print('错误! 映射的食品编码不存在: %s' % code) err_flag = True else: food_code_res.append(code) for code in attribute_codes: if code not in attribute_codes_set: print('错误! 映射的属性编码不存在: %s' % code) err_flag = True else: attribute_code_res.append(code) if len(food_codes) == 0: if len(attribute_codes) == 0: if re.search(r'[\u4E00-\u9FA5]+', s) is None: # 非汉字说明 err_flag = True print('错误! 映射的编码格式错误: %s' % s) # else: # print('警告! 仅映射了属性编码,没有食品编码,无法对应本体: %s' % '|'.join(attribute_codes)) if err_flag: print('上述错误出现于:%s\n' % err_position) return food_code_res, attribute_code_res def make_tree_with_ontology(file_path, sheet, food_codes: set, attribute_codes: set, ID='主键', NAME='名称', PARENT_ID='父ID', CODE='食品编码', PATH='路径', ONTOLOGY='分类+属性码'): df = read_excel(file_path, sheet=sheet, skip_row=1) ids = list(df[ID]) names = list(df[NAME]) parents = list(df[PARENT_ID]) codes = list(df[CODE]) paths = list(df[PATH]) ontologys = list(df[ONTOLOGY]) root = { 'id': 0, 'parent_id': -1, 'name': 'root', 'code': '', 'ontology': [], 'attribute': [], 'children': [] } nodes = [root] added_id = set() while len(nodes) > 0: node = nodes[0] added_id.add(node['id']) nodes = nodes[1:] for i in range(len(df)): if parents[i] == node['id']: ontology, attribute = parse_ontology( ontologys[i], food_codes, attribute_codes, sheet, i + 2, names[i]) child = { 'id': ids[i], 'name': names[i], 'children': [], 'parent_id': parents[i], 'code': codes[i], 'path': paths[i], 'ontology': ontology, 'attribute': attribute } node['children'].append(child) nodes.append(child) not_added_id = [x for x in ids if x not in added_id] if len(not_added_id) > 0: print('错误! 在表 %s 中, 这些序号的结点无法被加入分类树中, 请检查它们和它们的父节点是否正确: %s\n\n\n' % (sheet, '|'.join([str(x) for x in not_added_id]))) return root def get_ontology_codes(): def get_codes(file, title='编码'): df = read_excel(file) codes = [''.join(x.strip().split('.')) for x in df[title]] codes_set = set(codes) if '' in codes_set: codes_set.remove('') for code in codes_set: if codes.count(code) > 1: print('error in file %s! %s编码不唯一!' % (file, code)) return codes_set return get_codes(CONFIG.standard_foods_excel), get_codes( CONFIG.standard_attributes_excel) food_code_set, attribute_code_set = get_ontology_codes() for field, sheet in field_sheetname.items(): general_tree = make_tree_with_ontology(CONFIG.general_foods_excel, sheet, food_code_set, attribute_code_set) # 根结点单独处理 root_id = CONFIG.generate_new_id(field) all_data.general_foods[field] = dict() all_data.general_foods[field][root_id] = GeneralFoodNode( root_id, 'root', '', field) id_dict = {-1: root_id} # excel中的id到实际系统id的映射 code2id = {} # 其余结点DFS遍历进行插入操作即可 def func(node): parent_id = id_dict[node['parent_id']] new_id = all_data.insert_general_food(field, parent_id, node['name']) # 根据ontology及attribute更新映射 id_dict[node['id']] = new_id code2id[node['code']] = new_id for standard_code in node['ontology']: standard_food_id = food_code2id[standard_code] attribute_ids = [ attribute_code2id[code] for code in node['attribute'] ] all_data.add_mapping(field, new_id, standard_food_id, attribute_ids) dfs(general_tree, func) save_json_file( os.path.join(CONFIG.json_tree_folder, '%sCode2SystemID.json' % field), code2id) save_json_file( os.path.join(CONFIG.json_tree_folder, '%s.json' % field), general_tree)