Exemple #1
0
 def _get_stop(self):
     # TODO: use complex xpath
     ref = StopWithoutLineStr(
         select(self._elements,
                XPaths.stop_name_ref,
                namespaces=self._namespaces))
     db: ZODB.DB = ZODB.config.databaseFromString(zodb_conf)
     connection: Connection = db.open()
     stops_without_line: DBStopWithoutLine = connection.root(
     ).stops_without_line
     if ref not in stops_without_line.keys():
         name = select(self._elements,
                       XPaths.stop_name_name,
                       namespaces=self._namespaces)
         stops_without_line[ref] = StopWithoutLine(ref, name)
     if not stops_without_line[ref].has_location():
         lat = select(self._elements,
                      XPaths.stop_name_lat,
                      namespaces=self._namespaces)
         lon = select(self._elements,
                      XPaths.stop_name_lon,
                      namespaces=self._namespaces)
         stops_without_line[ref].set_location({
             'latitude': lat,
             'longitude': lon
         })
     self._stop = ref
Exemple #2
0
    def parseKML(self):                                                                  # parse XML to pandas self.DataTable
        """Parse XML content of a MOSMIX .kml file"""

        success = False
        if self._kml is not None:
            try:
                self.IssueTime = elementpath.select(self._kml, '//dwd:IssueTime/text()', self._kmlNS)[0]
                self.IssueTime = re.sub('T', ' ', self.IssueTime)
                self.IssueTime = re.sub('.000Z', '+00:00', self.IssueTime)               # now we have the same format as pandas will eventually output for time steps
                PeriodEnd      = elementpath.select(self._kml, '//dwd:ForecastTimeSteps/dwd:TimeStep/text()', self._kmlNS)
                ParaNames      = elementpath.select(self._kml, '//dwd:Forecast/@dwd:elementName', self._kmlNS)
                valStrArray    = elementpath.select(self._kml, '//dwd:Forecast/dwd:value', self._kmlNS)
                weatherData    = {}
                if (len(ParaNames) != len(valStrArray)):
                    raise Exception("ERROR --- length mismatch in parseKML()")
                for i, param in enumerate(ParaNames):
                    valStr = valStrArray[i].text.replace('-', 'nan')
                    valArr = valStr.split()
                    valArr = np.array(valArr)
                    valArr = np.asfarray(valArr, float)
                    weatherData.update({ param : valArr })
                self.DataTable            = pd.DataFrame(weatherData, index=pd.DatetimeIndex(PeriodEnd))
                self.DataTable.index.name = 'PeriodEnd'                                  # Time is in UTC
                success = True
            
            except Exception as e:
                print("parseKLM: " + str(e))
                sys.exit(1)

        return(success)
Exemple #3
0
def concat_not_working_workaround(xpath: str, tree: ElementTree.ElementTree,
                                  **kwargs):
    origin, entities = xpath.split('/concat(')
    entities: str = entities[:-1]
    entities: List[str] = entities.split(', ')
    output: List[str] = []
    regions: List[ElementTree.Element] = select(tree, origin, **kwargs)
    for i in regions:
        lists: List[List[str]] = []
        for j in entities:
            if j.find("'") == -1:
                temp_result = select(i, j, **kwargs)
                if isinstance(temp_result, list):
                    lists.append(temp_result)
                else:
                    lists.append([temp_result])
            else:
                lists.append([j.replace("'", '')])
        length: int = 0
        for j in lists:
            if len(j) > length:
                length = len(j)
        output_list: List[str] = []
        for j in range(length):
            string: str = ''
            for k in lists:
                if len(k) > 1:
                    string += k[j]
                elif len(k) == 1:
                    string += k[0]
            output_list.append(string)
        output += output_list
    return output
Exemple #4
0
 def get_document_id(self):
     xPath = """/*/id[@root="1.2.40.0.34.99.4613.3.1"]/@extension"""
     result = elementpath.select(self.root, xPath, self.namespaces)
     # TODO: define all possible Document ID xPaths
     if len(result) == 0:
         xPath = """/*/id[@root="1.2.40.0.10.1.4.3.4.2.1"]/@extension"""
         result = elementpath.select(self.root, xPath, self.namespaces)
     return result[0]
Exemple #5
0
    def to_dict(self, xpath=None, namespaces=None, **kwargs):
        """
        输出xml节点为字典(dict)
        注:该函数不支持处理xml中的属性值

        @param {string} xpath=None - 符合XPath语法的搜索路径,空代表根节点
        @param {dict} namespaces=None - 命名空间
        @param {**kwargs} kwargs - 扩展的参数,包括:
            item_dict_xpaths = None - {dict} - 指定list和tuple情况下,使用字典作为列表项的节点xPath路径
                key - 节点对应的xPath,value - 搜索命名空间,值为None或dict
                注:xPath为列表节点的路径(非列表项节点路径); xPath的路径从根节点开始查找

        @return {dict} - 转换后的字典对象
            注:包含节点自身,例如<data><a>val1</a><b>val2</b></data>转换后的字典应该通过dict['data']开始访问
        """
        # 生成item_dict_nodes参数
        _item_dict_nodes = None
        if 'item_dict_xpaths' in kwargs.keys(
        ) and kwargs['item_dict_xpaths'] is not None:
            _item_dict_nodes = list()
            for _get_xpath in kwargs['item_dict_xpaths'].keys():
                if self.use_xpath2:
                    # xpath2.0
                    _get_nodes = elementpath.select(
                        self.root,
                        _get_xpath,
                        namespaces=kwargs['item_dict_xpaths'][_get_xpath])
                else:
                    # xpath1.0
                    _get_nodes = self.root.xpath(
                        _get_xpath,
                        namespaces=kwargs['item_dict_xpaths'][_get_xpath])
                # 合并列表
                _item_dict_nodes = _item_dict_nodes + _get_nodes

        # 获取要处理的节点列表
        _roots = [self.root]
        if xpath is not None:
            # 获取全部匹配节点
            if self.use_xpath2:
                # xpath2.0
                _roots = elementpath.select(self.root,
                                            xpath,
                                            namespaces=namespaces)
            else:
                # xpath1.0
                _roots = self.root.xpath(xpath, namespaces=namespaces)

        # 生成字典
        _dict = dict()
        for _root in _roots:
            if _root is not None:
                _key, _value = self._xml_node_to_dict_value(
                    _root, item_dict_nodes=_item_dict_nodes)
                if _key is not None:
                    _dict[_key] = _value
        # 返回结果
        return _dict
    def test_select_function(self):
        self.assertListEqual(select(self.root, 'text()'), ['Dickens'])
        self.assertEqual(select(self.root, '$a', variables={'a': 1}), 1)

        self.assertEqual(
            select(self.root,
                   '$a',
                   variables={'a': 1},
                   variable_types={'a': 'xs:decimal'}), 1)
Exemple #7
0
 def get_patient_name(self):
     xPathVornamen = """//recordTarget/patientRole/patient/name/given/text()"""
     xPathNachname = """//recordTarget/patientRole/patient/name/family[not(@qualifier)]/text()"""
     result = {}
     result['vornamen'] = elementpath.select(self.root, xPathVornamen,
                                             self.namespaces)
     result['nachname'] = elementpath.select(self.root, xPathNachname,
                                             self.namespaces)
     return result
Exemple #8
0
 def _get_cords(self):
     # TODO: Use complex Xpaths
     lat = select(self._elements,
                  XPaths.location_lat,
                  namespaces=self._namespaces)
     self._lat: float = float(lat[0]) if lat else None
     lon = select(self._elements,
                  XPaths.location_lon,
                  namespaces=self._namespaces)
     self._lon: float = float(lon[0]) if lon else None
Exemple #9
0
 def extract(self) -> dict:
     """
     Extracts data from the HTML file using the given regex expressions
     :return: JSON formed dict of extracted data
     """
     if not self.extracted:  # if extraction was not performed yet
         # extract values that appear only once
         if self.single:
             for key, val in self.single.items():
                 if isinstance(val, list):
                     if val[1] == self.FORCE_LXML:
                         self.extracted[key] = self.joiner(
                             self.tree.xpath(val[0]))
                     else:
                         self.extracted[key] = (self.joiner(
                             elementpath.select(self.tree, val[0]), val[1]))
                 else:
                     self.extracted[key] = self.joiner(
                         elementpath.select(self.tree, val))
         # extract values that appear multiple times
         if self.multiple:
             tmp_extracted = {}
             tmp_len = 0
             multiple_extracted = []
             # get the results for each data item
             for key, val in self.multiple.items():
                 if isinstance(val, list):
                     if val[1] == self.FORCE_LXML:
                         current = self.tree.xpath(val[0])
                     else:
                         current = (elementpath.select(self.tree,
                                                       val[0]), val[1])
                 else:
                     current = elementpath.select(self.tree, val)
                     tmp_len = len(current)
                 tmp_extracted[key] = current
             # this goes from 0...amount of extracted data items
             for i in range(tmp_len):
                 tmp_dict = {}
                 # copy each i-th data item to tmp_dict
                 for k, v in tmp_extracted.items():
                     if isinstance(v, tuple):
                         tmp_dict[k] = self.joiner(v[0], v[1], i)
                     else:
                         tmp_dict[k] = self.joiner(v, i=i)
                 multiple_extracted.append(tmp_dict)
             if self.multiple_title is None:
                 self.extracted['Items'] = multiple_extracted
             else:
                 self.extracted[self.multiple_title] = multiple_extracted
     return self.extracted
Exemple #10
0
    def get_attr(self, xpath, attr_name, default='', namespaces=None):
        """
        获取指定节点的属性值

        @param {string} xpath - 符合XPath语法的搜索路径
            注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor'
                (2)xPath如果不是从'/'开始,则默认从root节点开始搜索
        @param {string} attr_name - 属性名
        @param {string} default='' - 如果找不到节点或具体属性时默认返回的值
        @param {dict} namespaces=None - 命名空间
            可传入值的示例如下:
                ns = {
                    'real_person': 'http://people.example.com',
                    'role': 'http://characters.example.com'
                }

        @return {string} - 第一个匹配节点的指定属性文本值,如果没有找到匹配节点或属性,返回''
        """
        if self.use_xpath2:
            # xpath2.0
            _nodes = elementpath.select(self.root,
                                        xpath,
                                        namespaces=namespaces)
        else:
            # xpath1.0
            _nodes = self.root.xpath(xpath, namespaces=namespaces)

        if len(_nodes) == 0:
            return default
        else:
            # 返回属性值
            return _nodes[0].get(attr_name, default=default)
Exemple #11
0
    def check_selector(self, path, root, expected, namespaces=None, **kwargs):
        """
        Checks using the selector API, namely the *select* function at package level.

        :param path: an XPath expression.
        :param root: an Element or an ElementTree instance.
        :param expected: the expected result. Can be a data instance to compare to the result, \
        a type to be used to check the type of the result, a function that accepts the result \
        as argument and returns a boolean value, an exception class that is raised by running \
        the evaluate method.
        :param namespaces: an optional mapping from prefixes to namespace URIs.
        :param kwargs: other optional arguments for the parser class.
        """
        if isinstance(expected, type) and issubclass(expected, Exception):
            self.assertRaises(expected, select, root, path, namespaces,
                              self.parser.__class__, **kwargs)
        else:
            results = select(root, path, namespaces, self.parser.__class__,
                             **kwargs)
            if isinstance(expected, list):
                self.assertListEqual(results, expected)
            elif isinstance(expected, set):
                self.assertEqual(set(results), expected)
            elif isinstance(expected, float) and math.isnan(expected):
                self.assertTrue(math.isnan(results))
            elif not callable(expected):
                self.assertEqual(results, expected)
            elif isinstance(expected, type):
                self.assertIsInstance(results, expected)
            else:
                self.assertTrue(expected(results))
    def get_default_value(self, xsd_element_node, xml_parent_node):
        # The attribute @default of the xsd:element node
        v = xsd_element_node.get("default")
        if v is not None:
            return v

        # The acrn:defaults and acrn:unique-among annotations which define a set of default values that shall be unique
        # among a collection of nodes
        annot_node = self.get_node(xsd_element_node, "xs:annotation")
        if annot_node is not None:
            defaults = annot_node.get("{https://projectacrn.org}defaults")
            unique_among = annot_node.get(
                "{https://projectacrn.org}unique-among")
            if defaults is not None and unique_among is not None:
                try:
                    default_values = set(eval(defaults))
                    existing_values = set(
                        elementpath.select(
                            self.xml_etree,
                            unique_among,
                            variables={"parent": xml_parent_node}))
                    available_defaults = default_values - existing_values
                    return sorted(list(available_defaults))[0]
                except:
                    pass

        return None
Exemple #13
0
    def evaluate_cda_file_Etree(self, xpath, cda_file):
        namespaces = {'': 'urn:hl7-org:v3'}
        try:
            root = self.get_root_from_xml(self, cda_file=cda_file)
            results = elementpath.select(root, xpath, namespaces)
        except elementpath.exceptions.ElementPathSyntaxError:
            print(
                "Log: CDAEvaluator.evaluate_cda_file_Etree 'ElementPathSyntaxError'"
            )
            return ["ElementPathSyntaxError"]
        except elementpath.exceptions.ElementPathTypeError:
            print(
                "Log: CDAEvaluator.evaluate_cda_file_Etree 'ElementPathTypeError'"
            )
            return ["ElementPathTypeError"]
        except FileNotFoundError:
            print(
                "Log: CDAEvaluator.evaluate_cda_file_Etree 'FileNotFoundError'"
            )
            return ["FileNotFoundError"]

        if results is not None and len(results) != 0:
            for entry in results:
                # info: that happens if the xpath is not specific enough so no '@' specification at the end
                if type(entry) is ET.Element:
                    index = results.index(entry)
                    results[
                        index] = "Ergebnisse gefunden (siehe ELGA Dokument)"
            return results
        return []
 def test_iter_siblings__issue_44(self):
     root = lxml_etree.XML(
         '<root>text 1<!-- comment -->text 2<!-- comment --> text 3</root>')
     result = select(root, 'node()[1]/following-sibling::node()')
     self.assertListEqual(result, [root[0], 'text 2', root[1], ' text 3'])
     self.assertListEqual(result,
                          root.xpath('node()[1]/following-sibling::node()'))
Exemple #15
0
    def set_value(self,
                  xpath,
                  value,
                  namespaces=None,
                  auto_create=True,
                  debug=False,
                  ignore_path_check=False):
        """
        设置指定节点的值

        @param {string} xpath - 符合XPath语法的搜索路径
            注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor'
                (2)xPath如果不是从'/'开始,则默认从root节点开始搜索
        @param {string} value - 要设置的值
        @param {dict} namespaces=None - 命名空间
            可传入值的示例如下:
                ns = {
                    'real_person': 'http://people.example.com',
                    'role': 'http://characters.example.com'
                }
        @param {bool} auto_create=True - 节点不存在的时候是否自动创建节点
        @param {bool} debug=False - 如果出现不可预知的异常时,打印入参
        @param {bool} ignore_path_check=False - 是否忽略路径检查(不检查直接操作)

        @throw {NameError} - 当节点不存在时抛出该异常
        @throws {AttributeError} - 当搜索路径不符合自动创建规范时,抛出该异常
        """
        try:
            if self.use_xpath2:
                # xpath2.0
                _nodes = elementpath.select(self.root,
                                            xpath,
                                            namespaces=namespaces)
            else:
                # xpath1.0
                _nodes = self.root.xpath(xpath, namespaces=namespaces)

            if len(_nodes) == 0:
                if auto_create:
                    # 找不到节点,尝试自动创建节点
                    _node = self.append_path_node(
                        xpath,
                        namespaces=namespaces,
                        ignore_path_check=ignore_path_check)
                    _node.text = value
                else:
                    # 不创建节点,抛出异常
                    raise NameError('can\'t find node by xpath')
            else:
                for _node in _nodes:
                    # 设置节点值
                    _node.text = value
        except:
            if debug:
                # 打印入参
                print(
                    'set_value error, para:[xpath=%s][value=%s][namespaces=%s][auto_create=%s]'
                    % (xpath, value, namespaces, auto_create))
            raise
Exemple #16
0
    def to_string(self, xpath=None, namespaces=None, **kwargs):
        """
        输出xml节点为字符串

        @param {string} xpath=None - 符合XPath语法的搜索路径,空代表根节点
        @param {dict} namespaces=None - 命名空间
        @param {**kwargs} kwargs - 扩展的参数,包括etree.tostring的参数:
            method="xml"
            xml_declaration=None - 控制是否在文件中添加xml的声明,True - 一直添加, False - 不添加
                如果传None,代表只有encoding不是US-ASCII or UTF-8 or Unicode的时候才添加声明
            pretty_print=True - 是否针对打印格式美化
            with_tail=True
            standalone=None
            doctype=None
            exclusive=False
            inclusive_ns_prefixes=None
            with_comments=True
            strip_text=False

        @return {string} - 输出的字符串
        """
        _node = None
        if xpath is None:
            _node = self.root
        else:
            if self.use_xpath2:
                # xpath2.0
                _nodes = elementpath.select(self.root,
                                            xpath,
                                            namespaces=namespaces)
            else:
                # xpath1.0
                _nodes = self.root.xpath(xpath, namespaces=namespaces)
            if len(_nodes) > 0:
                _node = _nodes[0]

        return ET.tostring(
            _node,
            encoding=self.encoding,
            method="xml"
            if 'method' not in kwargs.keys() else kwargs['method'],
            xml_declaration=None if 'xml_declaration' not in kwargs.keys() else
            kwargs['xml_declaration'],
            pretty_print=True
            if 'pretty_print' not in kwargs.keys() else kwargs['pretty_print'],
            with_tail=True
            if 'with_tail' not in kwargs.keys() else kwargs['with_tail'],
            standalone=None
            if 'standalone' not in kwargs.keys() else kwargs['standalone'],
            doctype=None
            if 'doctype' not in kwargs.keys() else kwargs['doctype'],
            exclusive=False
            if 'exclusive' not in kwargs.keys() else kwargs['exclusive'],
            inclusive_ns_prefixes=None if 'inclusive_ns_prefixes'
            not in kwargs.keys() else kwargs['inclusive_ns_prefixes'],
            with_comments=True if 'with_comments' not in kwargs.keys() else
            kwargs['with_comments'],
            strip_text=False if 'strip_text' not in kwargs.keys() else
            kwargs['strip_text']).decode(encoding=self.encoding)
Exemple #17
0
def elementpath_lxml_path_with_predicate():
    results = select(metadata_lxml_root,
                     path1,
                     namespaces,
                     parser=XPath1Parser)
    assert len(results) == 1
    assert results[0] is metadata_lxml_root[5][2][1][9]
    assert results[0].get('Algorithm') == value
Exemple #18
0
 def get_date_created(self):
     xPath = """/ClinicalDocument/effectiveTime/@value"""
     resultStr = elementpath.select(self.root, xPath, self.namespaces)
     result = None
     # print(resultStr[0])
     # TODO: falsches Datum abfangen
     if len(resultStr[0]) == 19:
         result = datetime.strptime(resultStr[0], '%Y%m%d%H%M%S%z')
     if len(resultStr[0]) == 8:
         result = datetime.strptime(resultStr[0], '%Y%m%d')
         result = pytz.utc.localize(result)
     return result
Exemple #19
0
    def get_reference_id_from_result(self, xpath):
        namespaces = {'': 'urn:hl7-org:v3'}
        hit = False
        while hit is False:
            #if xpath contains "concat" remove everything after that
            result = xpath.find('/concat')
            if result > 0:
                xpath = xpath[:result]

            xpath += '/parent::*'
            if len(elementpath.select(self.root, xpath, namespaces)) == 0:
                break
            try:
                hit = True if len(
                    elementpath.select(self.root, xpath + '/parent::entry',
                                       namespaces)) > 0 else False
            except elementpath.exceptions.ElementPathSyntaxError:
                print("Syntax Error")
            except FileNotFoundError:
                print("File not Found!")
            except elementpath.exceptions.ElementPathTypeError:
                print("Path Error")
                break
            except RecursionError:
                print("RecursionError")
        if hit is True:
            xpath += '//text/reference/@value'
            results = elementpath.select(self.root, xpath, namespaces)
            if results is not None and len(results) != 0:
                for entry in results:
                    index = results.index(entry)
                    reference = str(entry.replace('#', ''))
                    _results = elementpath.select(
                        self.root, "//component[section//*/@ID = '" +
                        reference + "']/section/code/@code", namespaces)
                    results[index] = 'id' + str(_results[0])
            return results
        return []
Exemple #20
0
    def set_attr(self,
                 xpath,
                 attr_name,
                 value,
                 namespaces=None,
                 auto_create=True,
                 ignore_path_check=False):
        """
        设置指定节点的值(只要节点存在强制新增属性)

        @param {string} xpath - 符合XPath语法的搜索路径
            注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor'
                (2)xPath如果不是从'/'开始,则默认从root节点开始搜索
        @param {string} attr_name - 属性名
        @param {string} value - 要设置的值
        @param {dict} namespaces=None - 命名空间
            可传入值的示例如下:
                ns = {
                    'real_person': 'http://people.example.com',
                    'role': 'http://characters.example.com'
                }
        @param {bool} auto_create=True - 节点不存在的时候是否自动创建
        @param {bool} ignore_path_check=False - 是否忽略路径检查(不检查直接操作)

        @throw {NameError} - 当节点不存在时抛出该异常
        @throws {AttributeError} - 当搜索路径不符合自动创建规范时,抛出该异常
        """
        if self.use_xpath2:
            # xpath2.0
            _nodes = elementpath.select(self.root,
                                        xpath,
                                        namespaces=namespaces)
        else:
            # xpath1.0
            _nodes = self.root.xpath(xpath, namespaces=namespaces)

        if len(_nodes) == 0:
            if auto_create:
                # 找不到节点,尝试自动创建节点
                _nodes = [
                    self.append_path_node(xpath,
                                          namespaces=namespaces,
                                          ignore_path_check=ignore_path_check)
                ]
            else:
                # 不创建节点,抛出异常
                raise NameError('can\'t find node by xpath')
        for _node in _nodes:
            # 设置节点属性值
            _node.set(attr_name, value)
Exemple #21
0
    def check_current(self, parser_class):
        xml_doc = etree.parse(
            StringIO("""
        <root>
            <element id="a">value</element>
            <element id="b" ref="a">other value</element>
        </root>
        """))
        expr = "current()"
        for node in xml_doc.iter():
            nodes = select_with_context(xml_doc,
                                        node,
                                        expr,
                                        parser=parser_class)
            self.assertEqual([node], nodes)

        node_a = select(xml_doc, "//element")[0]
        node_b = select(xml_doc, "//element")[1]
        ref_node = select(xml_doc, "//element[@ref]")[0]
        nodes = select_with_context(xml_doc,
                                    ref_node,
                                    "/root/element[@id=current()/@ref]",
                                    parser=parser_class)
        self.assertEqual([node_a], nodes)
Exemple #22
0
 def get_context_elements(self, xml_document, rule_context, namespaces,
                          variables):
     result = select(xml_document,
                     rule_context,
                     namespaces=namespaces,
                     variables=variables)
     if rule_context.startswith('/'):
         return result
     else:
         selector = Selector(rule_context,
                             namespaces=namespaces,
                             variables=variables)
         for el in xml_document.iter():
             result.extend(selector.select(el))
         return result
Exemple #23
0
    def get_nodes(self, xpath, namespaces=None):
        """
        获取xpath指定的节点清单

        @param {string} xpath - 符合XPath语法的搜索路径
        @param {dict} namespaces=None - 命名空间

        @return {list} - 返回节点清单(返回的数组内部对象为ET._Element)
        """
        if self.use_xpath2:
            # xpath2.0
            _els = elementpath.select(self.root, xpath, namespaces=namespaces)
        else:
            # xpath1.0
            _els = self.root.xpath(xpath, namespaces=namespaces)
        return _els
Exemple #24
0
    def type_of_element(self, type_tag, xsd_element_node, xml_node):
        xsd_alternative_node = xsd_element_node

        if xml_node is not None:
            for alternative in xsd_element_node.findall("xs:alternative", namespaces=self.xpath_ns):
                if elementpath.select(xml_node, alternative.get("test")):
                    xsd_alternative_node = alternative
                    break

        type_node = xsd_alternative_node.find(type_tag, namespaces=self.xpath_ns)
        if type_node is None:
            type_name = xsd_alternative_node.get("type")
            if type_name:
                type_node = self.get_node(self.xsd_etree, f".//{type_tag}[@name='{type_name}']")

        return type_node
Exemple #25
0
    def findall(self, path, namespaces=None):
        """
        Finds all matching XSD/XML elements or attributes.

        :param path: is an XPath expression that considers the schema as the root element \
        with global elements as its children.
        :param namespaces: an optional mapping from namespace prefix to full name.
        :return: a list containing all matching XSD/XML elements or attributes. An empty list \
        is returned if there is no match.
        """
        if path.startswith('/'):
            path = u'.%s' % path
        return select(self,
                      path,
                      namespaces or self.xpath_namespaces,
                      strict=False)
Exemple #26
0
 def format_paths(unified_node, parent_map, report_on, variables):
     elems = elementpath.select(unified_node, report_on, variables = variables, parser = elementpath.XPath2Parser)
     paths = []
     for elem in elems:
         path = []
         while elem is not None:
             path_segment = elem.tag
             parent = parent_map.get(elem, None)
             if parent is not None:
                 children = parent.findall(elem.tag)
                 if len(children) > 1:
                     path_segment += f"[{children.index(elem) + 1}]"
             path.insert(0, path_segment)
             elem = parent
         paths.append(f"/{'/'.join(path)}")
     return paths
    def test_basic_example(self):
        schema = Schema(get_file("schematron", "basic.sch"))

        doc = etree.parse(get_file("xml", "basic1_ok.xml"))

        variables = {}
        parser = XPath2Parser(schema.ns_prefixes, variables)
        for p in schema.patterns.values():
            # print("[XX] %s has %d rules" % (p.id, len(p.rules)))
            for r in p.rules:

                elements = select(doc, r.context)
                for element in elements:
                    context = XPathContext(root=doc, item=element)
                    for a in r.assertions:
                        root_token = parser.parse(a.test)
                        result = root_token.evaluate(context)
                        self.assertTrue(result, a.to_string())
Exemple #28
0
def main() -> None:
    from xml.etree.ElementTree import XML
    import elementpath

    root = XML('<a><b1/><b2><c1/><c2/></b2><b3/></a>')

    result = elementpath.select(root, '*')
    print(result)

    result = list(elementpath.iter_select(root, '*'))
    print(result)

    selector = elementpath.Selector('*')
    result = selector.select(root)
    print(result)

    result = list(selector.iter_select(root))
    print(result)
Exemple #29
0
def construct_complex_xpath(type: str, is_lineref: bool, single: bool,
                            *args: str, tree: ElementTree.ElementTree,
                            **kwargs) -> List[str]:
    if not args:
        raise NotImplementedError(
            'Tried to run construct_complex_xpath without xpath names')
    if type == 'StopEvent' or type == 'Trip':
        trip: bool = True
        if type == 'StopEvent':
            trip = False
        xpaths: List[str] = []
        prefixes: List[str] = []
        for i in args:
            xpaths.append(paths[i][0])
            prefixes.append(paths[i][1])
        extension: str
        separator: str = ", '" + kwargs.get('separator', ' # ') + "', "
        if prefixes.count(prefixes[0]) == len(prefixes):
            extension = prefixes[0] + '/concat('
            for i in xpaths:
                extension += i + separator
            extension = extension[:-(len(separator))] + ')'
        else:
            extension = '/concat('
            for i in range(len(xpaths)):
                extension += prefixes[i][1:] + '/' + xpaths[i] + separator
            extension = extension[:-(len(separator))] + ')'
        if type == 'StopEvent':
            extension = extension.replace(timed_leg[1:], this_call[1:])
        xpath = construct_simple_xpath(trip, is_lineref, single, extension,
                                       **kwargs)
        elementtree_kwargs: Dict[str, Any] = {
            'namespaces': kwargs.get('namespaces')
        }
        result: List[str]
        if elementpath_concat_fixed:
            result = select(tree, xpath, **elementtree_kwargs)
        else:
            result = concat_not_working_workaround(xpath, tree,
                                                   **elementtree_kwargs)
        return result
    else:
        raise NotImplementedError('Unknown type ' + type +
                                  ' at construct_complex_xpath.')
Exemple #30
0
    def remove(self, xpath, namespaces=None, hold_tail=False):
        """
        根据xpath删除节点

        @param {string} xpath - 符合XPath语法的搜索路径

        @param {dict} namespaces=None - 命名空间
        @param {bool} hold_tail=False - 是否保留上一节点的tail信息
        """
        if self.use_xpath2:
            # xpath2.0
            _nodes = elementpath.select(self.root,
                                        xpath,
                                        namespaces=namespaces)
        else:
            # xpath1.0
            _nodes = self.root.xpath(xpath, namespaces=namespaces)

        for _node in _nodes:
            self.remove_node(_node, hold_tail=hold_tail)
Exemple #31
0
    def iter_decode(self, source, path=None, validation='lax', process_namespaces=True,
                    namespaces=None, use_defaults=True, decimal_type=None, datetime_types=False,
                    converter=None, defuse=None, timeout=None, **kwargs):
        """
        Creates an iterator for decoding an XML source to a data structure.

        :param source: the XML data source. Can be a path to a file or an URI of a resource or \
        an opened file-like object or an Element Tree instance or a string containing XML data.
        :param path: is an optional XPath expression that matches the parts of the document \
        that have to be decoded. The XPath expression considers the schema as the root \
        element with global elements as its children.
        :param validation: defines the XSD validation mode to use for decode, can be 'strict', \
        'lax' or 'skip'.
        :param process_namespaces: indicates whether to use namespace information in the decoding \
        process, using the map provided with the argument *namespaces* and the map extracted from \
        the XML document.
        :param namespaces: is an optional mapping from namespace prefix to URI.
        :param use_defaults: indicates whether to use default values for filling missing data.
        :param decimal_type: conversion type for `Decimal` objects (generated by XSD `decimal` \
        built-in and derived types), useful if you want to generate a JSON-compatible data structure.
        :param datetime_types: if set to `True` the datetime and duration XSD types are decoded, \
        otherwise their origin XML string is returned.
        :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding.
        :param defuse: Overrides when to defuse XML data. Can be 'always', 'remote' or 'never'.
        :param timeout: Overrides the timeout setted for the schema.
        :param kwargs: Keyword arguments containing options for converter and decoding.
        :return: Yields a decoded data object, eventually preceded by a sequence of validation \
        or decoding errors.
        """
        if not self.built:
            raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self)
        elif not self.elements:
            raise XMLSchemaValueError("decoding needs at least one XSD element declaration!")

        if not isinstance(source, XMLResource):
            defuse = defuse or self.defuse
            timeout = timeout or self.timeout
            source = XMLResource(source=source, defuse=defuse, timeout=timeout, lazy=False)
        elif defuse and source.defuse != defuse or timeout and source.timeout != timeout:
            source = source.copy(defuse=defuse, timeout=timeout, lazy=False)

        if process_namespaces:
            namespaces = {} if namespaces is None else namespaces.copy()
            namespaces.update(source.get_namespaces())
        else:
            namespaces = {}

        converter = self.get_converter(converter, namespaces, **kwargs)

        if path is None:
            xsd_element = self.find(source.root.tag, namespaces=namespaces)
            if not isinstance(xsd_element, XsdElement):
                reason = "%r is not a global element of the schema!" % source.root.tag
                yield XMLSchemaValidationError(self, source.root, reason, source, namespaces)
            else:
                for obj in xsd_element.iter_decode(
                        source.root, validation, converter, source=source, namespaces=namespaces,
                        use_defaults=use_defaults, decimal_type=decimal_type,
                        datetime_types=datetime_types, **kwargs):
                    yield obj
        else:
            xsd_element = self.find(path, namespaces=namespaces)
            if not isinstance(xsd_element, XsdElement):
                reason = "the path %r doesn't match any element of the schema!" % path
                obj = elementpath.select(source.root, path, namespaces=namespaces) or source.root
                yield XMLSchemaValidationError(self, obj, reason, source, namespaces)
            else:
                for elem in elementpath.select(source.root, path, namespaces=namespaces):
                    for obj in xsd_element.iter_decode(
                            elem, validation, converter, source=source, namespaces=namespaces,
                            use_defaults=use_defaults, decimal_type=decimal_type,
                            datetime_types=datetime_types, **kwargs):
                        yield obj