Python TypeSystem.is_primitive_arrayの例

プログラミング言語: Python

名前空間/パッケージ名: cassis.typesystem

クラス/型: TypeSystem

メソッド/関数: is_primitive_array

hotexamples.comのコード掲載数: 2

Python TypeSystem.is_primitive_array - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcassis.typesystem.TypeSystem.is_primitive_arrayの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_type(11)

TypeSystem(5)

is_primitive(4)

is_collection(3)

is_primitive_collection(3)

is_instance_of(2)

is_primitive_array(2)

is_primitive_list(2)

コード例 #1

ファイルを表示

    def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[str, List[str]]):
        # Strip the http prefix, replace / with ., remove the ecore part
        # TODO: Error checking
        type_name: str = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip()

        if type_name.startswith("uima.noNamespace."):
            type_name = type_name[17:]

        AnnotationType = typesystem.get_type(type_name)
        attributes = dict(elem.attrib)
        attributes.update(children)

        # Map the xmi:id attribute to xmiID
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))

        if "begin" in attributes:
            attributes["begin"] = int(attributes["begin"])

        if "end" in attributes:
            attributes["end"] = int(attributes["end"])

        if "sofa" in attributes:
            attributes["sofa"] = int(attributes["sofa"])

        # Remap features that use a reserved Python name
        if "self" in attributes:
            attributes["self_"] = attributes.pop("self")

        if "type" in attributes:
            attributes["type_"] = attributes.pop("type")

        # Arrays which were represented as nested elements in the XMI have so far have only been parsed into a Python
        # arrays. Now we convert them to proper UIMA arrays/lists
        if not typesystem.is_primitive_array(type_name):
            for feature_name, feature_value in children.items():
                feature = AnnotationType.get_feature(feature_name)
                if typesystem.is_primitive_array(feature.rangeType):
                    ArrayType = feature.rangeType
                    attributes[feature_name] = ArrayType(elements=attributes[feature_name])
                if typesystem.is_primitive_list(feature.rangeType):
                    attributes[feature_name] = self._parse_primitive_list(feature.rangeType, attributes[feature_name])

        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        return AnnotationType(**attributes)

コード例 #2

ファイルを表示

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: bool, trusted: bool):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        OUTSIDE_FS = 1
        INSIDE_FS = 2
        INSIDE_ARRAY = 3

        sofas = {}
        views = {}
        feature_structures = {}
        children = defaultdict(list)
        lenient_ids = set()

        context = etree.iterparse(source, events=("start", "end"), huge_tree=trusted)

        state = OUTSIDE_FS
        self._max_xmi_id = 0
        self._max_sofa_num = 0

        for event, elem in context:
            # Ignore the 'xmi:XMI'
            if elem.tag == TAG_XMI:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                if event == "end":
                    sofa = self._parse_sofa(typesystem, elem)
                    sofas[sofa.xmiID] = sofa
            elif elem.tag == TAG_CAS_VIEW:
                if event == "end":
                    proto_view = self._parse_view(elem)
                    views[proto_view.sofa] = proto_view
            else:
                """
                In XMI, array element features can be encoded as

                <cas:StringArray>
                    <elements>LNC</elements>
                    <elements>MTH</elements>
                    <elements>SNOMEDCT_US</elements>
                </cas:StringArray>

                In order to parse this with an incremental XML parser, we need to employ
                a simple state machine. It is depicted in the following.

                                   "start"               "start"
                     +-----------+-------->+-----------+-------->+--------+
                     | Outside   |         | Inside    |         | Inside |
                +--->+ feature   |         | feature   |         | array  |
                     | structure |         | structure |         | element|
                     +-----------+<--------+-----------+<--------+--------+
                                    "end"                 "end"
                """
                if event == "start":
                    if state == OUTSIDE_FS:
                        # We saw the opening tag of a new feature structure
                        state = INSIDE_FS
                    elif state == INSIDE_FS:
                        # We saw the opening tag of an array element
                        state = INSIDE_ARRAY
                    else:
                        raise RuntimeError(f"Invalid state transition: [{state}] 'start'")
                elif event == "end":
                    if state == INSIDE_FS:
                        # We saw the closing tag of a new feature
                        state = OUTSIDE_FS

                        # If a type was not found, ignore it if lenient, else raise an exception
                        try:
                            fs = self._parse_feature_structure(typesystem, elem, children)
                            feature_structures[fs.xmiID] = fs
                        except TypeNotFoundError as e:
                            if not lenient:
                                raise e

                            warnings.warn(e.message)
                            xmiID = elem.attrib.get("{http://www.omg.org/XMI}id", None)
                            if xmiID:
                                lenient_ids.add(int(xmiID))

                        children.clear()
                    elif state == INSIDE_ARRAY:
                        # We saw the closing tag of an array element
                        children[elem.tag].append(elem.text)
                        state = INSIDE_FS
                    else:
                        raise RuntimeError(f"Invalid state transition: [{state}] 'end'")
                else:
                    raise RuntimeError(f"Invalid XML event: [{event}]")

            # Free already processed elements from memory
            if event == "end":
                self._clear_elem(elem)

        # Post-process feature values
        for xmi_id, fs in feature_structures.items():
            t = typesystem.get_type(fs.type.name)

            for feature in t.all_features:
                feature_name = feature.name
                value = fs[feature_name]

                if feature_name == "sofa":
                    fs[feature_name] = sofas[value]
                    continue

                if typesystem.is_instance_of(fs.type.name, TYPE_NAME_STRING_ARRAY):
                    # We already parsed string arrays to a Python list of string
                    # before, so we do not need to work more on this
                    continue
                elif typesystem.is_primitive(feature.rangeType):
                    fs[feature_name] = self._parse_primitive_value(feature.rangeType, value)
                    continue
                elif typesystem.is_primitive_array(fs.type) and feature_name == "elements":
                    # Separately rendered arrays (typically used with multipleReferencesAllowed = True)
                    fs[feature_name] = self._parse_primitive_array(fs.type, value)
                elif typesystem.is_primitive_array(feature.rangeType) and not feature.multipleReferencesAllowed:
                    # Array feature rendered inline (multipleReferencesAllowed = False|None)
                    # We also end up here for array features that were rendered as child elements. No need to parse
                    # them again, so we check if the value is still a string (i.e. attribute value) and only then
                    # process it
                    if isinstance(value, str):
                        FSType = feature.rangeType
                        fs[feature_name] = FSType(elements=self._parse_primitive_array(feature.rangeType, value))
                elif typesystem.is_primitive_list(feature.rangeType) and not feature.multipleReferencesAllowed:
                    # Array feature rendered inline (multipleReferencesAllowed = False|None)
                    # We also end up here for array features that were rendered as child elements. No need to parse
                    # them again, so we check if the value is still a string (i.e. attribute value) and only then
                    # process it
                    if isinstance(value, str):
                        fs[feature_name] = self._parse_primitive_list(feature.rangeType, value)
                else:
                    # Resolve references here
                    if value is None:
                        continue

                    # Resolve references
                    if fs.type.name == TYPE_NAME_FS_ARRAY or (
                        feature.rangeType.name == TYPE_NAME_FS_ARRAY and not feature.multipleReferencesAllowed
                    ):
                        # An array of references is a list of integers separated
                        # by single spaces, e.g. <foo:bar elements="1 2 3 42" />
                        targets = []
                        for ref in value.split():
                            target_id = int(ref)
                            target = feature_structures[target_id]
                            targets.append(target)

                        if feature.rangeType.name == TYPE_NAME_FS_ARRAY:
                            # Wrap inline array into the appropriate array object
                            ArrayType = typesystem.get_type(TYPE_NAME_FS_ARRAY)
                            targets = ArrayType(elements=targets)

                        fs[feature_name] = targets
                    elif feature.rangeType.name == TYPE_NAME_FS_LIST and not feature.multipleReferencesAllowed:
                        # Array feature rendered inline (multipleReferencesAllowed = False|None)
                        # We also end up here for array features that were rendered as child elements. No need to parse
                        # them again, so we check if the value is still a string (i.e. attribute value) and only then
                        # process it
                        if isinstance(value, list) or isinstance(value, str):
                            fs[feature_name] = self._parse_fs_list(feature_structures, feature.rangeType, value)
                    else:
                        target_id = int(value)
                        fs[feature_name] = feature_structures[target_id]

        cas = Cas(typesystem=typesystem, lenient=lenient)
        for sofa in sofas.values():
            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")

                # We need to make sure that the sofa gets the real xmi, see #155
                view.get_sofa().xmiID = sofa.xmiID
            else:
                view = cas.create_view(sofa.sofaID, xmiID=sofa.xmiID, sofaNum=sofa.sofaNum)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            # If a sofa has no members, then UIMA might omit the view. In that case,
            # we create an empty view for it.
            if sofa.xmiID in views:
                proto_view = views[sofa.xmiID]
            else:
                proto_view = ProtoView(sofa.xmiID)

            for member_id in proto_view.members:
                # We ignore ids of feature structures for which we do not have a type
                if member_id in lenient_ids:
                    continue

                fs = feature_structures[member_id]

                # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
                if typesystem.is_instance_of(fs.type.name, "uima.tcas.Annotation"):
                    fs.begin = sofa._offset_converter.external_to_python(fs.begin)
                    fs.end = sofa._offset_converter.external_to_python(fs.end)

                view.add(fs, keep_id=True)

        cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
        cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)

        return cas