def _resource_transform_meta_key_value(self, minidom_meta_element): rel = minidom_meta_element.getAttribute('rel') if rel is None: _LOG.debug('"rel" missing from ResourceMeta') return None, None full_obj = {} att_container = minidom_meta_element.attributes for i in range(att_container.length): attr = att_container.item(i) handling_code, new_name = _resource_meta_att_decision_fn(attr.name) if handling_code == ATT_TRANSFORM_CODE.IN_FULL_OBJECT: full_obj[new_name] = attr.value else: if handling_code == ATT_TRANSFORM_CODE.IN_XMLNS_OBJ: full_obj.setdefault('@xmlns', {})[new_name] = attr.value else: assert handling_code == ATT_TRANSFORM_CODE.HANDLED rel = '^' + rel att_str_val, ntl = _extract_text_and_child_element_list( minidom_meta_element) if att_str_val: _LOG.debug('text content of ResourceMeta of rel="%s"', rel) return None, None if ntl: self._hbf_handle_child_elements(full_obj, ntl) if not full_obj: _LOG.debug( 'ResourceMeta of rel="%s" without condents ("href" attribute or nested meta)', rel) return None, None _cull_redundant_about(full_obj) return rel, full_obj
def _resource_transform_meta_key_value(self, minidom_meta_element): rel = minidom_meta_element.getAttribute('rel') if rel is None: _LOG.debug('"rel" missing from ResourceMeta') return None, None full_obj = {} att_container = minidom_meta_element.attributes for i in range(att_container.length): attr = att_container.item(i) handling_code, new_name = _resource_meta_att_decision_fn(attr.name) if handling_code == ATT_TRANSFORM_CODE.IN_FULL_OBJECT: full_obj[new_name] = attr.value else: if handling_code == ATT_TRANSFORM_CODE.IN_XMLNS_OBJ: full_obj.setdefault('@xmlns', {})[new_name] = attr.value else: assert handling_code == ATT_TRANSFORM_CODE.HANDLED rel = '^' + rel att_str_val, ntl = _extract_text_and_child_element_list(minidom_meta_element) if att_str_val: _LOG.debug('text content of ResourceMeta of rel="%s"', rel) return None, None if ntl: self._hbf_handle_child_elements(full_obj, ntl) if not full_obj: _LOG.debug('ResourceMeta of rel="%s" without condents ("href" attribute or nested meta)', rel) return None, None _cull_redundant_about(full_obj) return rel, full_obj
def _transform_resource_meta(self, res_bf_meta): try: att_key = res_bf_meta['@rel'] except KeyError: if self._workaround_phylografter_rel_bug: att_key = res_bf_meta['@property'] else: raise full_obj = {} for k in res_bf_meta.keys(): if k not in _SUPPRESSED_RESOURCE: full_obj[k] = res_bf_meta[k] att_key = '^' + att_key assert full_obj _cull_redundant_about(full_obj) return att_key, full_obj
def _transform_resource_meta(self, res_bf_meta): try: att_key = res_bf_meta['@rel'] except KeyError: if self._workaround_phylografter_rel_bug: att_key = res_bf_meta['@property'] else: raise full_obj = {} for k in res_bf_meta.keys(): if k not in _SUPPRESSED_RESOURCE: full_obj[k] = res_bf_meta[k] att_key = '^' + att_key assert full_obj _cull_redundant_about(full_obj) return att_key, full_obj
def _transform_literal_meta(self, lit_bf_meta): dt = lit_bf_meta.get('@datatype') content = lit_bf_meta.get('$') att_key = lit_bf_meta['@property'] full_obj = {} for k in lit_bf_meta.keys(): if k not in _SUPPRESSED_LITERAL: full_obj[k] = lit_bf_meta[k] # Coercion should not be needed for json->json if dt and self._coercing_literals: if is_str_type(content): content = _coerce_literal_val_to_primitive(dt, content) att_key = '^' + att_key if full_obj: if content: full_obj['$'] = content _cull_redundant_about(full_obj) return att_key, full_obj return att_key, content
def _transform_literal_meta(self, lit_bf_meta): dt = lit_bf_meta.get('@datatype') content = lit_bf_meta.get('$') att_key = lit_bf_meta['@property'] full_obj = {} for k in lit_bf_meta.keys(): if k not in _SUPPRESSED_LITERAL: full_obj[k] = lit_bf_meta[k] # Coercion should not be needed for json->json if dt and self._coercing_literals: if is_str_type(content): content = _coerce_literal_val_to_primitive(dt, content) att_key = '^' + att_key if full_obj: if content: full_obj['$'] = content _cull_redundant_about(full_obj) return att_key, full_obj return att_key, content
def _literal_transform_meta_key_value(self, minidom_meta_element): att_key = None dt = minidom_meta_element.getAttribute('datatype') or 'xsd:string' att_str_val = minidom_meta_element.getAttribute('content') att_key = minidom_meta_element.getAttribute('property') full_obj = {} if att_key is None: _LOG.debug('"property" missing from literal meta') return None, None att_container = minidom_meta_element.attributes for i in range(att_container.length): attr = att_container.item(i) handling_code, new_name = _literal_meta_att_decision_fn(attr.name) if handling_code == ATT_TRANSFORM_CODE.IN_FULL_OBJECT: full_obj[new_name] = attr.value else: if handling_code == ATT_TRANSFORM_CODE.IN_XMLNS_OBJ: full_obj.setdefault('@xmlns', {})[new_name] = attr.value else: assert handling_code == ATT_TRANSFORM_CODE.HANDLED if not att_str_val: att_str_val, ntl = _extract_text_and_child_element_list( minidom_meta_element) att_str_val = att_str_val.strip() if len(ntl) > 1: _LOG.debug( 'Nested meta elements are not legal for LiteralMeta (offending property="%s")', att_key) return None, None if len(ntl) == 1: self._hbf_handle_child_elements(full_obj, ntl) att_key = '^' + att_key trans_val = _coerce_literal_val_to_primitive(dt, att_str_val) if trans_val is None: return None, None if full_obj: if trans_val: full_obj['$'] = trans_val _cull_redundant_about(full_obj) return att_key, full_obj return att_key, trans_val
def _recursive_convert_dict(self, obj): _cull_redundant_about(obj) # rule 10... meta_list = _get_index_list_of_values(obj, 'meta') to_inject = {} for meta in meta_list: xt = meta['@xsi:type'] if _RESOURCE_META_PAT.match(xt): mk, mv = self._transform_resource_meta(meta) else: assert _LITERAL_META_PAT.match(xt) mk, mv = self._transform_literal_meta(meta) _add_value_to_dict_bf(to_inject, mk, mv) if ('meta' in obj) and self.remove_old_structs: del obj['meta'] for k, v in to_inject.items(): _add_value_to_dict_bf(obj, k, v) for k, v in obj.items(): if isinstance(v, dict): self._recursive_convert_dict(v) elif isinstance(v, list): self._recursive_convert_list(v)
def _recursive_convert_dict(self, obj): _cull_redundant_about(obj) # rule 10... meta_list = _get_index_list_of_values(obj, 'meta') to_inject = {} for meta in meta_list: xt = meta['@xsi:type'] if _RESOURCE_META_PAT.match(xt): mk, mv = self._transform_resource_meta(meta) else: assert _LITERAL_META_PAT.match(xt) mk, mv = self._transform_literal_meta(meta) _add_value_to_dict_bf(to_inject, mk, mv) if ('meta' in obj) and self.remove_old_structs: del obj['meta'] for k, v in to_inject.items(): _add_value_to_dict_bf(obj, k, v) for k, v in obj.items(): if isinstance(v, dict): self._recursive_convert_dict(v) elif isinstance(v, list): self._recursive_convert_list(v)
def _hbf_handle_child_elements(self, obj, ntl): ''' Indirect recursion through _gen_hbf_el ''' # accumulate a list of the children names in ko, and # the a dictionary of tag to xml elements. # repetition of a tag means that it will map to a list of # xml elements cd = {} ko = [] ks = set() for child in ntl: k = child.nodeName if k == 'meta' and (not self._badgerfish_style_conversion): matk, matv = self._transform_meta_key_value(child) if matk is not None: _add_value_to_dict_bf(obj, matk, matv) else: if k not in ks: ko.append(k) ks.add(k) _add_value_to_dict_bf(cd, k, child) # Converts the child XML elements to dicts by recursion and # adds these to the dict. for k in ko: v = _index_list_of_values(cd, k) dcl = [] ct = None for xc in v: ct, dc = self._gen_hbf_el(xc) dcl.append(dc) # this assertion will trip is the hacky stripping of namespaces # results in a name clash among the tags of the children assert ct not in obj obj[ct] = dcl # delete redundant about attributes that are used in XML, but not JSON (last rule of HoneyBadgerFish) _cull_redundant_about(obj) return obj
def _hbf_handle_child_elements(self, obj, ntl): """ Indirect recursion through _gen_hbf_el """ # accumulate a list of the children names in ko, and # the a dictionary of tag to xml elements. # repetition of a tag means that it will map to a list of # xml elements cd = {} ko = [] ks = set() for child in ntl: k = child.nodeName if k == 'meta' and (not self._badgerfish_style_conversion): matk, matv = self._transform_meta_key_value(child) if matk is not None: _add_value_to_dict_bf(obj, matk, matv) else: if k not in ks: ko.append(k) ks.add(k) _add_value_to_dict_bf(cd, k, child) # Converts the child XML elements to dicts by recursion and # adds these to the dict. for k in ko: v = _index_list_of_values(cd, k) dcl = [] ct = None for xc in v: ct, dc = self._gen_hbf_el(xc) dcl.append(dc) # this assertion will trip is the hacky stripping of namespaces # results in a name clash among the tags of the children assert ct not in obj obj[ct] = dcl # delete redundant about attributes that are used in XML, but not JSON (last rule of HoneyBadgerFish) _cull_redundant_about(obj) return obj
def _literal_transform_meta_key_value(self, minidom_meta_element): att_key = None dt = minidom_meta_element.getAttribute('datatype') or 'xsd:string' att_str_val = minidom_meta_element.getAttribute('content') att_key = minidom_meta_element.getAttribute('property') full_obj = {} if att_key is None: _LOG.debug('"property" missing from literal meta') return None, None att_container = minidom_meta_element.attributes for i in range(att_container.length): attr = att_container.item(i) handling_code, new_name = _literal_meta_att_decision_fn(attr.name) if handling_code == ATT_TRANSFORM_CODE.IN_FULL_OBJECT: full_obj[new_name] = attr.value else: if handling_code == ATT_TRANSFORM_CODE.IN_XMLNS_OBJ: full_obj.setdefault('@xmlns', {})[new_name] = attr.value else: assert handling_code == ATT_TRANSFORM_CODE.HANDLED if not att_str_val: att_str_val, ntl = _extract_text_and_child_element_list(minidom_meta_element) att_str_val = att_str_val.strip() if len(ntl) > 1: _LOG.debug('Nested meta elements are not legal for LiteralMeta (offending property="%s")', att_key) return None, None if len(ntl) == 1: self._hbf_handle_child_elements(full_obj, ntl) att_key = '^' + att_key trans_val = _coerce_literal_val_to_primitive(dt, att_str_val) if trans_val is None: return None, None if full_obj: if trans_val: full_obj['$'] = trans_val _cull_redundant_about(full_obj) return att_key, full_obj return att_key, trans_val