def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]: xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"} root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs) self._serialize_cas_null(root) # Find all fs, even the ones that are not directly added to a sofa for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID): self._serialize_feature_structure(cas, root, fs) for sofa in cas.sofas: self._serialize_sofa(root, sofa) for view in cas.views: self._serialize_view(root, view) doc = etree.ElementTree(root) etree.cleanup_namespaces(doc, top_nsmap=self._nsmap) return_str = sink is None if return_str: sink = BytesIO() doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8") if return_str: return sink.getvalue().decode("utf-8") return None
def serialize( self, sink: Union[IO, str, None], cas: Cas, pretty_print: bool = True, ensure_ascii: bool = False, type_system_mode: TypeSystemMode = TypeSystemMode.FULL, ) -> Union[str, None]: feature_structures = [] views = {} for view in cas.views: views[view.sofa.sofaID] = self._serialize_view(view) if view.sofa.sofaArray: json_sofa_array_fs = self._serialize_feature_structure( view.sofa.sofaArray) feature_structures.append(json_sofa_array_fs) json_sofa_fs = self._serialize_feature_structure(view.sofa) feature_structures.append(json_sofa_fs) # Find all fs, even the ones that are not directly added to a sofa used_types = set() for fs in sorted( cas._find_all_fs(include_inlinable_arrays_and_lists=True), key=lambda a: a.xmiID): used_types.add(fs.type) json_fs = self._serialize_feature_structure(fs) feature_structures.append(json_fs) types = None if type_system_mode is not TypeSystemMode.NONE: types = {} if type_system_mode is TypeSystemMode.MINIMAL: # Build transitive closure of used types by following parents, features, etc. types_to_include = cas.typesystem.transitive_closure( used_types) elif type_system_mode is TypeSystemMode.FULL: types_to_include = cas.typesystem.get_types() for type_ in types_to_include: if type_.name == TYPE_NAME_DOCUMENT_ANNOTATION: continue json_type = self._serialize_type(type_) types[json_type[NAME_FIELD]] = json_type data = {} if types is not None: data[TYPES_FIELD] = types if feature_structures is not None: data[FEATURE_STRUCTURES_FIELD] = feature_structures if views is not None: data[VIEWS_FIELD] = views if sink and not isinstance(sink, TextIOBase): sink = TextIOWrapper(sink, encoding="utf-8", write_through=True) if sink: json.dump( data, sink, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii, allow_nan=False, ) else: return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii, allow_nan=False) if isinstance(sink, TextIOWrapper): sink.detach() # Prevent TextIOWrapper from closing the BytesIO return None