Python TypeSystem.get_typeの例

プログラミング言語: Python

名前空間/パッケージ名: cassis.typesystem

クラス/型: TypeSystem

メソッド/関数: get_type

hotexamples.comのコード掲載数: 11

Python TypeSystem.get_type - 11件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcassis.typesystem.TypeSystem.get_typeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_type(11)

TypeSystem(5)

is_primitive(4)

is_collection(3)

is_primitive_collection(3)

is_instance_of(2)

is_primitive_array(2)

is_primitive_list(2)

コード例 #1

ファイルを表示

ファイル: xmi.py プロジェクト: hatzel/dkpro-cassis

    def _parse_feature_structure(self, typesystem: TypeSystem, elem,
                                 children: Dict[str, List[str]]):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking
        typename = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip()

        AnnotationType = typesystem.get_type(typename)
        attributes = dict(elem.attrib)
        attributes.update(children)

        # Map the xmi:id attribute to xmiID
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))

        if "begin" in attributes:
            attributes["begin"] = int(attributes["begin"])

        if "end" in attributes:
            attributes["end"] = int(attributes["end"])

        if "sofa" in attributes:
            attributes["sofa"] = int(attributes["sofa"])

        # Remap features that use a reserved Python name
        if "self" in attributes:
            attributes["self_"] = attributes.pop("self")

        if "type" in attributes:
            attributes["type_"] = attributes.pop("type")

        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        return AnnotationType(**attributes)

コード例 #2

ファイルを表示

    def _parse_annotation(self, typesystem: TypeSystem, elem):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking

        typename = elem.tag
        if typename.startswith("{"):
            parts = re.findall("(?:/|})([^}/.]+)", typename)
            typename = ".".join(parts)

        AnnotationType = typesystem.get_type(typename)
        attributes = dict(elem.attrib)

        # Map the xmi:id attribute to xmiID
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))

        if "begin" in attributes:
            attributes["begin"] = int(attributes["begin"])

        if "end" in attributes:
            attributes["end"] = int(attributes["end"])

        if "sofa" in attributes:
            attributes["sofa"] = int(attributes["sofa"])

        return AnnotationType(**attributes)

コード例 #3

ファイルを表示

    def _parse_sofa(self, typesystem: TypeSystem, elem) -> Sofa:
        attributes = dict(elem.attrib)
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))
        attributes["sofaNum"] = int(attributes["sofaNum"])
        attributes["type"] = typesystem.get_type(TYPE_NAME_SOFA)
        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        self._max_sofa_num = max(attributes["sofaNum"], self._max_sofa_num)

        return Sofa(**attributes)

コード例 #4

ファイルを表示

ファイル: xmi.py プロジェクト: GillesJ/dkpro-cassis

    def _parse_annotation(self, typesystem: TypeSystem, elem):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking
        typename = elem.tag[9:].replace('/', '.').replace('ecore}', '')

        AnnotationType = typesystem.get_type(typename)
        attributes = dict(elem.attrib)

        # Map the xmi:id attribute to xmiID
        attributes['xmiID'] = int(attributes.pop('{http://www.omg.org/XMI}id'))
        attributes['begin'] = int(attributes['begin'])
        attributes['end'] = int(attributes['end'])
        attributes['sofa'] = int(attributes['sofa'])

        return AnnotationType(**attributes)

コード例 #5

ファイルを表示

    def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[str, List[str]]):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking
        type_name: str = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip()

        if type_name.startswith("uima.noNamespace."):
            type_name = type_name[17:]

        AnnotationType = typesystem.get_type(type_name)
        attributes = dict(elem.attrib)
        attributes.update(children)

        # Map the xmi:id attribute to xmiID
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))

        if "begin" in attributes:
            attributes["begin"] = int(attributes["begin"])

        if "end" in attributes:
            attributes["end"] = int(attributes["end"])

        if "sofa" in attributes:
            attributes["sofa"] = int(attributes["sofa"])

        # Remap features that use a reserved Python name
        if "self" in attributes:
            attributes["self_"] = attributes.pop("self")

        if "type" in attributes:
            attributes["type_"] = attributes.pop("type")

        # Arrays which were represented as nested elements in the XMI have so far have only been parsed into a Python
        # arrays. Now we convert them to proper UIMA arrays/lists
        if not typesystem.is_primitive_array(type_name):
            for feature_name, feature_value in children.items():
                feature = AnnotationType.get_feature(feature_name)
                if typesystem.is_primitive_array(feature.rangeType):
                    ArrayType = feature.rangeType
                    attributes[feature_name] = ArrayType(elements=attributes[feature_name])
                if typesystem.is_primitive_list(feature.rangeType):
                    attributes[feature_name] = self._parse_primitive_list(feature.rangeType, attributes[feature_name])

        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        return AnnotationType(**attributes)

コード例 #6

ファイルを表示

    def _parse_annotation(self, typesystem: TypeSystem, elem):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking
        typename = elem.tag[9:].replace("/", ".").replace("ecore}", "")

        AnnotationType = typesystem.get_type(typename)
        attributes = dict(elem.attrib)

        # Map the xmi:id attribute to xmiID
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))

        if "begin" in attributes:
            attributes["begin"] = int(attributes["begin"])

        if "end" in attributes:
            attributes["end"] = int(attributes["end"])

        if "sofa" in attributes:
            attributes["sofa"] = int(attributes["sofa"])

        return AnnotationType(**attributes)

コード例 #7

ファイルを表示

ファイル: xmi.py プロジェクト: hatzel/dkpro-cassis

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem,
                    lenient: bool, trusted: bool):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        OUTSIDE_FS = 1
        INSIDE_FS = 2
        INSIDE_ARRAY = 3

        sofas = {}
        views = {}
        feature_structures = {}
        children = defaultdict(list)
        lenient_ids = set()

        context = etree.iterparse(source,
                                  events=("start", "end"),
                                  huge_tree=trusted)

        state = OUTSIDE_FS
        self._max_xmi_id = 0
        self._max_sofa_num = 0

        for event, elem in context:
            # Ignore the 'xmi:XMI'
            if elem.tag == TAG_XMI:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                if event == "end":
                    sofa = self._parse_sofa(elem)
                    sofas[sofa.xmiID] = sofa
            elif elem.tag == TAG_CAS_VIEW:
                if event == "end":
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
            else:
                """
                In XMI, array element features can be encoded as
                
                <cas:StringArray>
                    <elements>LNC</elements>
                    <elements>MTH</elements>
                    <elements>SNOMEDCT_US</elements>
                </cas:StringArray>
                
                In order to parse this with an incremental XML parser, we need to employ 
                a simple state machine. It is depicted in the following.
                            
                                   "start"               "start"
                     +-----------+-------->+-----------+-------->+--------+
                     | Outside   |         | Inside    |         | Inside |
                +--->+ feature   |         | feature   |         | array  |
                     | structure |         | structure |         | element|
                     +-----------+<--------+-----------+<--------+--------+
                                    "end"                 "end"                                
                """
                if event == "start":
                    if state == OUTSIDE_FS:
                        # We saw the opening tag of a new feature structure
                        state = INSIDE_FS
                    elif state == INSIDE_FS:
                        # We saw the opening tag of an array element
                        state = INSIDE_ARRAY
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'start'".format(
                                state))
                elif event == "end":
                    if state == INSIDE_FS:
                        # We saw the closing tag of a new feature
                        state = OUTSIDE_FS

                        # If a type was not found, ignore it if lenient, else raise an exception
                        try:
                            fs = self._parse_feature_structure(
                                typesystem, elem, children)
                            feature_structures[fs.xmiID] = fs
                        except TypeNotFoundError as e:
                            if not lenient:
                                raise e

                            warnings.warn(e.message)
                            xmiID = elem.attrib.get(
                                "{http://www.omg.org/XMI}id", None)
                            if xmiID:
                                lenient_ids.add(int(xmiID))

                        children.clear()
                    elif state == INSIDE_ARRAY:
                        # We saw the closing tag of an array element
                        children[elem.tag].append(elem.text)
                        state = INSIDE_FS
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'end'".format(
                                state))
                else:
                    raise RuntimeError(
                        "Invalid XML event: [{0}]".format(event))

            # Free already processed elements from memory
            if event == "end":
                self._clear_elem(elem)

        # Post-process feature values
        referenced_fs = set()
        for xmi_id, fs in feature_structures.items():
            t = typesystem.get_type(fs.type)

            for feature in t.all_features:
                feature_name = feature.name

                if feature_name == "sofa":
                    value = getattr(fs, feature_name)
                    sofa = sofas[value]
                    setattr(fs, feature_name, sofa)
                    continue

                if (typesystem.is_primitive(feature.rangeTypeName)
                        or typesystem.is_primitive_collection(
                            feature.rangeTypeName)
                        or typesystem.is_primitive_collection(fs.type)):
                    # TODO: Parse feature values to their real type here, e.g. parse ints or floats
                    continue

                # Resolve references here
                value = getattr(fs, feature_name)
                if value is None:
                    continue

                # Resolve references
                if typesystem.is_collection(fs.type, feature):
                    # A collection of references is a list of integers separated
                    # by single spaces, e.g. <foo:bar elements="1 2 3 42" />
                    targets = []
                    for ref in value.split():
                        target_id = int(ref)
                        target = feature_structures[target_id]
                        targets.append(target)
                        referenced_fs.add(target_id)
                    setattr(fs, feature_name, targets)
                else:
                    target_id = int(value)
                    target = feature_structures[target_id]
                    referenced_fs.add(target_id)
                    setattr(fs, feature_name, target)

        cas = Cas(typesystem=typesystem, lenient=lenient)
        for sofa in sofas.values():
            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")

                # We need to make sure that the sofa gets the real xmi, see #155
                view.get_sofa().xmiID = sofa.xmiID
            else:
                view = cas.create_view(sofa.sofaID,
                                       xmiID=sofa.xmiID,
                                       sofaNum=sofa.sofaNum)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            # If a sofa has no members, then UIMA might omit the view. In that case,
            # we create an empty view for it.
            if sofa.xmiID in views:
                proto_view = views[sofa.xmiID]
            else:
                proto_view = ProtoView(sofa.xmiID)

            for member_id in proto_view.members:
                # We ignore ids of feature structures for which we do not have a type
                if member_id in lenient_ids:
                    continue

                fs = feature_structures[member_id]

                # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
                if typesystem.is_instance_of(fs.type, "uima.tcas.Annotation"):
                    try:
                        fs.begin = sofa._offset_converter.uima_to_cassis(
                            fs.begin)
                        fs.end = sofa._offset_converter.uima_to_cassis(fs.end)
                    except KeyError:
                        pass
                view.add_annotation(fs, keep_id=True)

        cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
        cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)

        return cas

コード例 #8

ファイルを表示

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_NULL = NS_CAS + "NULL"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        sofas = []
        views = {}
        annotations = {}

        depth = 0
        context = etree.iterparse(source, events=("end", "start"))
        for event, elem in context:
            #assert event == "end"

            if event == "start":
                depth += 1

                if elem.tag == TAG_XMI:
                    # Ignore the closing 'xmi:XMI' tag
                    pass
                elif elem.tag == TAG_CAS_NULL:
                    pass
                elif elem.tag == TAG_CAS_SOFA:
                    sofa = self._parse_sofa(elem)
                    sofas.append(sofa)
                elif elem.tag == TAG_CAS_VIEW:
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
                else:
                    if depth == 2:
                        annotation = self._parse_annotation(typesystem, elem)
                        annotations[annotation.xmiID] = annotation

                    else:
                        pass

            elif event == "end":
                depth -= 1
                # Free already processed elements from memory
                self._clear_elem(elem)

            else:
                raise RuntimeError("Invalid parsing event '%s'!" % event)

        if len(sofas) != len(views):
            raise RuntimeError("Number of views and sofas is not equal!")

        for annotation in annotations.values():
            ann_type = typesystem.get_type(annotation.type)

            all_features = {}
            all_features.update(ann_type._inherited_features)
            all_features.update(ann_type._features)

            for attribute in annotation.__slots__:
                feature = all_features.get(attribute)

                if feature:
                    feat_type = typesystem.get_type(feature.rangeTypeName)

                    if feat_type.name == 'uima.tcas.Annotation' or feat_type.supertypeName == 'uima.tcas.Annotation':
                        feat_xml_id = annotation.__getattribute__(attribute)

                        if feat_xml_id:
                            feat_ann = annotations.get(int(feat_xml_id))

                            if feat_ann:

                                feat_ancestors = [feat_type.name]
                                while feat_ancestors:
                                    feat_ancestor = feat_ancestors.pop(0)

                                    if feat_ann.type == feat_ancestor:
                                        annotation.__setattr__(
                                            attribute, feat_ann)
                                        feat_ancestors.clear()

                                    else:
                                        feat_ancestors.extend(
                                            typesystem.get_type(
                                                feat_ancestor)._children)

        cas = Cas()
        for sofa in sofas:
            proto_view = views[sofa.xmiID]

            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")
            else:
                view = cas.create_view(sofa.sofaID)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            for member_id in proto_view.members:
                annotation = annotations[member_id]

                view.add_annotation(annotation)

        return cas

コード例 #9

ファイルを表示

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_NULL = NS_CAS + "NULL"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        OUTSIDE_FS = 1
        INSIDE_FS = 2
        INSIDE_ARRAY = 3

        sofas = []
        views = {}
        feature_structures = {}
        children = defaultdict(list)

        context = etree.iterparse(source, events=("start", "end"))

        state = OUTSIDE_FS

        for event, elem in context:
            if elem.tag == TAG_XMI or elem.tag == TAG_CAS_NULL:
                pass
                # Ignore the 'xmi:XMI' and 'cas:NULL' elements
            elif elem.tag == TAG_CAS_SOFA:
                if event == "end":
                    sofa = self._parse_sofa(elem)
                    sofas.append(sofa)
            elif elem.tag == TAG_CAS_VIEW:
                if event == "end":
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
            else:
                """
                In XMI, array element features can be encoded as
                
                <cas:StringArray>
                    <elements>LNC</elements>
                    <elements>MTH</elements>
                    <elements>SNOMEDCT_US</elements>
                </cas:StringArray>
                
                In order to parse this with an incremental XML parser, we need to employ 
                a simple state machine. It is depicted in the following.
                            
                                   "start"               "start"
                     +-----------+-------->+-----------+-------->+--------+
                     | Outside   |         | Inside    |         | Inside |
                +--->+ feature   |         | feature   |         | array  |
                     | structure |         | structure |         | element|
                     +-----------+<--------+-----------+<--------+--------+
                                    "end"                 "end"                                
                """
                if event == "start":
                    if state == OUTSIDE_FS:
                        # We saw the opening tag of a new feature structure
                        state = INSIDE_FS
                    elif state == INSIDE_FS:
                        # We saw the opening tag of an array element
                        state = INSIDE_ARRAY
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'start'".format(
                                state))
                elif event == "end":
                    if state == INSIDE_FS:
                        # We saw the closing tag of a new feature
                        state = OUTSIDE_FS
                        fs = self._parse_feature_structure(
                            typesystem, elem, children)
                        feature_structures[fs.xmiID] = fs

                        children.clear()
                    elif state == INSIDE_ARRAY:
                        # We saw the closing tag of an array element
                        children[elem.tag].append(elem.text)
                        state = INSIDE_FS
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'end'".format(
                                state))
                else:
                    raise RuntimeError(
                        "Invalid XML event: [{0}]".format(event))

            # Free already processed elements from memory
            if event == "end":
                self._clear_elem(elem)

        if len(sofas) != len(views):
            raise RuntimeError("Number of views and sofas is not equal!")

        # Post-process feature values
        for xmi_id, fs in feature_structures.items():
            t = typesystem.get_type(fs.type)

            for feature in t.all_features:
                feature_name = feature.name

                if feature_name == "sofa":
                    continue

                if typesystem.is_primitive(
                        feature.rangeTypeName
                ) or typesystem.is_primitive_collection(feature.rangeTypeName):
                    # TODO: Parse feature values to their real type here, e.g. parse ints or floats
                    continue

                # Resolve references here
                value = getattr(fs, feature_name)
                if value is None:
                    continue

                # Resolve references
                if typesystem.is_collection(feature.rangeTypeName):
                    # A collection of references is a list of integers separated
                    # by single spaces, e.g. <foo:bar elements="1 2 3 42" />
                    targets = []
                    for ref in value.split():
                        target_id = int(ref)
                        target = feature_structures[target_id]
                        targets.append(target)
                    setattr(fs, feature_name, targets)
                else:
                    target_id = int(value)
                    target = feature_structures[target_id]
                    setattr(fs, feature_name, target)

        cas = Cas(typesystem)
        for sofa in sofas:
            proto_view = views[sofa.xmiID]

            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")
            else:
                view = cas.create_view(sofa.sofaID)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            for member_id in proto_view.members:
                annotation = feature_structures[member_id]

                view.add_annotation(annotation)

        return cas

コード例 #10

ファイルを表示

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: bool, trusted: bool):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        OUTSIDE_FS = 1
        INSIDE_FS = 2
        INSIDE_ARRAY = 3

        sofas = {}
        views = {}
        feature_structures = {}
        children = defaultdict(list)
        lenient_ids = set()

        context = etree.iterparse(source, events=("start", "end"), huge_tree=trusted)

        state = OUTSIDE_FS
        self._max_xmi_id = 0
        self._max_sofa_num = 0

        for event, elem in context:
            # Ignore the 'xmi:XMI'
            if elem.tag == TAG_XMI:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                if event == "end":
                    sofa = self._parse_sofa(typesystem, elem)
                    sofas[sofa.xmiID] = sofa
            elif elem.tag == TAG_CAS_VIEW:
                if event == "end":
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
            else:
                """
                In XMI, array element features can be encoded as

                <cas:StringArray>
                    <elements>LNC</elements>
                    <elements>MTH</elements>
                    <elements>SNOMEDCT_US</elements>
                </cas:StringArray>

                In order to parse this with an incremental XML parser, we need to employ
                a simple state machine. It is depicted in the following.

                                   "start"               "start"
                     +-----------+-------->+-----------+-------->+--------+
                     | Outside   |         | Inside    |         | Inside |
                +--->+ feature   |         | feature   |         | array  |
                     | structure |         | structure |         | element|
                     +-----------+<--------+-----------+<--------+--------+
                                    "end"                 "end"
                """
                if event == "start":
                    if state == OUTSIDE_FS:
                        # We saw the opening tag of a new feature structure
                        state = INSIDE_FS
                    elif state == INSIDE_FS:
                        # We saw the opening tag of an array element
                        state = INSIDE_ARRAY
                    else:
                        raise RuntimeError(f"Invalid state transition: [{state}] 'start'")
                elif event == "end":
                    if state == INSIDE_FS:
                        # We saw the closing tag of a new feature
                        state = OUTSIDE_FS

                        # If a type was not found, ignore it if lenient, else raise an exception
                        try:
                            fs = self._parse_feature_structure(typesystem, elem, children)
                            feature_structures[fs.xmiID] = fs
                        except TypeNotFoundError as e:
                            if not lenient:
                                raise e

                            warnings.warn(e.message)
                            xmiID = elem.attrib.get("{http://www.omg.org/XMI}id", None)
                            if xmiID:
                                lenient_ids.add(int(xmiID))

                        children.clear()
                    elif state == INSIDE_ARRAY:
                        # We saw the closing tag of an array element
                        children[elem.tag].append(elem.text)
                        state = INSIDE_FS
                    else:
                        raise RuntimeError(f"Invalid state transition: [{state}] 'end'")
                else:
                    raise RuntimeError(f"Invalid XML event: [{event}]")

            # Free already processed elements from memory
            if event == "end":
                self._clear_elem(elem)

        # Post-process feature values
        for xmi_id, fs in feature_structures.items():
            t = typesystem.get_type(fs.type.name)

            for feature in t.all_features:
                feature_name = feature.name
                value = fs[feature_name]

                if feature_name == "sofa":
                    fs[feature_name] = sofas[value]
                    continue

                if typesystem.is_instance_of(fs.type.name, TYPE_NAME_STRING_ARRAY):
                    # We already parsed string arrays to a Python list of string
                    # before, so we do not need to work more on this
                    continue
                elif typesystem.is_primitive(feature.rangeType):
                    fs[feature_name] = self._parse_primitive_value(feature.rangeType, value)
                    continue
                elif typesystem.is_primitive_array(fs.type) and feature_name == "elements":
                    # Separately rendered arrays (typically used with multipleReferencesAllowed = True)
                    fs[feature_name] = self._parse_primitive_array(fs.type, value)
                elif typesystem.is_primitive_array(feature.rangeType) and not feature.multipleReferencesAllowed:
                    # Array feature rendered inline (multipleReferencesAllowed = False|None)
                    # We also end up here for array features that were rendered as child elements. No need to parse
                    # them again, so we check if the value is still a string (i.e. attribute value) and only then
                    # process it
                    if isinstance(value, str):
                        FSType = feature.rangeType
                        fs[feature_name] = FSType(elements=self._parse_primitive_array(feature.rangeType, value))
                elif typesystem.is_primitive_list(feature.rangeType) and not feature.multipleReferencesAllowed:
                    # Array feature rendered inline (multipleReferencesAllowed = False|None)
                    # We also end up here for array features that were rendered as child elements. No need to parse
                    # them again, so we check if the value is still a string (i.e. attribute value) and only then
                    # process it
                    if isinstance(value, str):
                        fs[feature_name] = self._parse_primitive_list(feature.rangeType, value)
                else:
                    # Resolve references here
                    if value is None:
                        continue

                    # Resolve references
                    if fs.type.name == TYPE_NAME_FS_ARRAY or (
                        feature.rangeType.name == TYPE_NAME_FS_ARRAY and not feature.multipleReferencesAllowed
                    ):
                        # An array of references is a list of integers separated
                        # by single spaces, e.g. <foo:bar elements="1 2 3 42" />
                        targets = []
                        for ref in value.split():
                            target_id = int(ref)
                            target = feature_structures[target_id]
                            targets.append(target)

                        if feature.rangeType.name == TYPE_NAME_FS_ARRAY:
                            # Wrap inline array into the appropriate array object
                            ArrayType = typesystem.get_type(TYPE_NAME_FS_ARRAY)
                            targets = ArrayType(elements=targets)

                        fs[feature_name] = targets
                    elif feature.rangeType.name == TYPE_NAME_FS_LIST and not feature.multipleReferencesAllowed:
                        # Array feature rendered inline (multipleReferencesAllowed = False|None)
                        # We also end up here for array features that were rendered as child elements. No need to parse
                        # them again, so we check if the value is still a string (i.e. attribute value) and only then
                        # process it
                        if isinstance(value, list) or isinstance(value, str):
                            fs[feature_name] = self._parse_fs_list(feature_structures, feature.rangeType, value)
                    else:
                        target_id = int(value)
                        fs[feature_name] = feature_structures[target_id]

        cas = Cas(typesystem=typesystem, lenient=lenient)
        for sofa in sofas.values():
            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")

                # We need to make sure that the sofa gets the real xmi, see #155
                view.get_sofa().xmiID = sofa.xmiID
            else:
                view = cas.create_view(sofa.sofaID, xmiID=sofa.xmiID, sofaNum=sofa.sofaNum)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            # If a sofa has no members, then UIMA might omit the view. In that case,
            # we create an empty view for it.
            if sofa.xmiID in views:
                proto_view = views[sofa.xmiID]
            else:
                proto_view = ProtoView(sofa.xmiID)

            for member_id in proto_view.members:
                # We ignore ids of feature structures for which we do not have a type
                if member_id in lenient_ids:
                    continue

                fs = feature_structures[member_id]

                # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
                if typesystem.is_instance_of(fs.type.name, "uima.tcas.Annotation"):
                    fs.begin = sofa._offset_converter.external_to_python(fs.begin)
                    fs.end = sofa._offset_converter.external_to_python(fs.end)

                view.add(fs, keep_id=True)

        cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
        cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)

        return cas

コード例 #11

ファイルを表示

ファイル: xmi.py プロジェクト: Pangeamt/dkpro-cassis

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        OUTSIDE_FS = 1
        INSIDE_FS = 2
        INSIDE_ARRAY = 3

        sofas = {}
        views = {}
        feature_structures = {}
        children = defaultdict(list)

        context = etree.iterparse(source, events=("start", "end"))

        state = OUTSIDE_FS
        self._max_xmi_id = 0
        self._max_sofa_num = 0

        for event, elem in context:
            # Ignore the 'xmi:XMI'
            if elem.tag == TAG_XMI:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                if event == "end":
                    sofa = self._parse_sofa(elem)
                    sofas[sofa.xmiID] = sofa
            elif elem.tag == TAG_CAS_VIEW:
                if event == "end":
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
            else:
                """
                In XMI, array element features can be encoded as
                
                <cas:StringArray>
                    <elements>LNC</elements>
                    <elements>MTH</elements>
                    <elements>SNOMEDCT_US</elements>
                </cas:StringArray>
                
                In order to parse this with an incremental XML parser, we need to employ 
                a simple state machine. It is depicted in the following.
                            
                                   "start"               "start"
                     +-----------+-------->+-----------+-------->+--------+
                     | Outside   |         | Inside    |         | Inside |
                +--->+ feature   |         | feature   |         | array  |
                     | structure |         | structure |         | element|
                     +-----------+<--------+-----------+<--------+--------+
                                    "end"                 "end"                                
                """
                if event == "start":
                    if state == OUTSIDE_FS:
                        # We saw the opening tag of a new feature structure
                        state = INSIDE_FS
                    elif state == INSIDE_FS:
                        # We saw the opening tag of an array element
                        state = INSIDE_ARRAY
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'start'".format(
                                state))
                elif event == "end":
                    if state == INSIDE_FS:
                        # We saw the closing tag of a new feature
                        state = OUTSIDE_FS
                        fs = self._parse_feature_structure(
                            typesystem, elem, children)
                        feature_structures[fs.xmiID] = fs

                        children.clear()
                    elif state == INSIDE_ARRAY:
                        # We saw the closing tag of an array element
                        children[elem.tag].append(elem.text)
                        state = INSIDE_FS
                    else:
                        raise RuntimeError(
                            "Invalid state transition: [{0}] 'end'".format(
                                state))
                else:
                    raise RuntimeError(
                        "Invalid XML event: [{0}]".format(event))

            # Free already processed elements from memory
            if event == "end":
                self._clear_elem(elem)

        # Post-process feature values
        referenced_fs = set()
        for xmi_id, fs in feature_structures.items():
            t = typesystem.get_type(fs.type)

            for feature in t.all_features:
                feature_name = feature.name

                if feature_name == "sofa":
                    value = getattr(fs, feature_name)
                    sofa = sofas[value]
                    setattr(fs, feature_name, sofa)
                    continue

                if (typesystem.is_primitive(feature.rangeTypeName)
                        or typesystem.is_primitive_collection(
                            feature.rangeTypeName)
                        or typesystem.is_primitive_collection(fs.type)):
                    # TODO: Parse feature values to their real type here, e.g. parse ints or floats
                    continue

                # Resolve references here
                value = getattr(fs, feature_name)
                if value is None:
                    continue

                # Resolve references
                if typesystem.is_collection(fs.type, feature):
                    # A collection of references is a list of integers separated
                    # by single spaces, e.g. <foo:bar elements="1 2 3 42" />
                    targets = []
                    for ref in value.split():
                        target_id = int(ref)
                        target = feature_structures[target_id]
                        targets.append(target)
                        referenced_fs.add(target_id)
                    setattr(fs, feature_name, targets)
                else:
                    target_id = int(value)
                    target = feature_structures[target_id]
                    referenced_fs.add(target_id)
                    setattr(fs, feature_name, target)

        cas = Cas(typesystem=typesystem)
        for sofa in sofas.values():
            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")
            else:
                view = cas.create_view(sofa.sofaID,
                                       xmiID=sofa.xmiID,
                                       sofaNum=sofa.sofaNum)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            # If a sofa has no members, then UIMA might omit the view. In that case,
            # we create an empty view for it.
            if sofa.xmiID in views:
                proto_view = views[sofa.xmiID]
            else:
                proto_view = ProtoView(sofa.xmiID)

            # Patch: Rewrite offsets
            if view.sofa_string is not None:
                mapper = JavaOffsetsMapper(view.sofa_string)
            else:
                mapper = None
            # End patch

            for member_id in proto_view.members:
                annotation = feature_structures[member_id]

                # Patch: Rewrite offsets
                if mapper and \
                        hasattr(annotation, "begin") and\
                        hasattr(annotation, "end") and\
                        annotation.begin is not None and\
                        annotation.end is not None:

                    annotation.begin = mapper.java_to_python_begin(
                        annotation.begin)
                    try:
                        annotation.end = mapper.java_to_python_end(
                            annotation.end)
                    except Exception as e:
                        print("\nYYYYYYYYYY", annotation.end)
                        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx",
                              mapper.text)

                # End patch

                view.add_annotation(annotation, keep_id=True)

        cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
        cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)

        return cas