Ejemplo n.º 1
0
    def _report_status_checks(self, processed_report_doc: etree._ElementTree,
                              embedded: bool):
        super()._report_status_checks(processed_report_doc, embedded)

        # check for any unsupported local features, e.g. DataTable
        # NOTE - we could eventually have different validators for local and uploaded reports
        if embedded:
            pass
        else:
            # TODO - validate at least a single element
            asset_blocks = processed_report_doc.xpath("count(/Report/Main//*)")
            if asset_blocks < 3:
                raise InvalidReportError(
                    "Empty report - must contain at least one asset/block")
            elif asset_blocks < 4:
                url = "https://docs.datapane.com/reports/blocks/layout-pages-and-selects"
                display_msg(
                    text=
                    f"Your report only contains a single element - did you know you can include additional plots, tables and text in a report? Check out {url} for more info",
                    md=
                    f"Your report only contains a single element - did you know you can include additional plots, tables and text in a report? Check out [the docs]({url}) for more info",
                )

            has_text: bool = processed_report_doc.xpath(
                "boolean(/Report/Main/Page//Text)")
            if not has_text:
                display_msg(
                    "Your report doesn't contain any text - consider using TextReport to upload assets and add text to your report from your browser"
                )
Ejemplo n.º 2
0
def get_clusters(tree: etree._ElementTree) -> ty.Dict[str, ty.Set[str]]:
    chains_grp = tree.xpath(
        './tei:standOff/tei:annotation[@tei:type="coreference"]/tei:linkGrp[@tei:type="schema"]',
        namespaces=NSMAP,
    )[0]

    mentions = tree.xpath(
        ('./tei:standOff/tei:annotation[@tei:type="coreference"]'
         '/tei:spanGrp[@tei:subtype="mention"]/tei:span'),
        namespaces=NSMAP,
    )

    res = dict()
    for c in chains_grp.iter(f"{TEI}link"):
        target = c.attrib[f"{TEI}target"]
        res[c.attrib[f"{XML}id"]] = set((t[1:] for t in target.split()))

    non_sing = set().union(*res.values())
    for m in mentions:
        i = m.attrib[f"{XML}id"]
        if i not in non_sing:
            res[i] = {i}
    a_id, b_id, intersect = next(
        ((a_id, b_id, intersect) for a_id, a in res.items()
         for b_id, b in res.items() if b is not a
         for intersect in (a.intersection(b), ) if intersect),
        (None, None, None),
    )
    if intersect is not None:
        raise Exception(
            f"Schemas {a_id} and {b_id} are not disjoints: {intersect}")
    return res
Ejemplo n.º 3
0
def save_xml(tree: et._ElementTree, fn: str):
    et.indent(tree, '    ')
    tree.write(fn, pretty_print=True, encoding='utf-8')
    with open(fn, 'r', encoding='utf-8') as file:
        to_save = file.read()
    to_save = to_save.replace('<TEI>',
                              '<TEI xmlns="http://www.tei-c.org/ns/1.0">')
    to_save = to_save.replace('verse=', 'xml:id=')
    with open(fn, 'w', encoding='utf-8') as file:
        file.write(to_save)
    return fn
Ejemplo n.º 4
0
def _get_cib_version(
    cib: _ElementTree, attribute: str, regexp: Pattern
) -> Version:
    version = cib.getroot().get(attribute)
    if version is None:
        raise LibraryError(
            ReportItem.error(
                reports.messages.CibLoadErrorBadFormat(
                    f"the attribute '{attribute}' of the element 'cib' "
                    "is missing"
                )
            )
        )
    match = regexp.match(version)
    if not match:
        raise LibraryError(
            ReportItem.error(
                reports.messages.CibLoadErrorBadFormat(
                    f"the attribute '{attribute}' of the element 'cib' has "
                    f"an invalid value: '{version}'"
                )
            )
        )
    return Version(
        int(match.group("major")),
        int(match.group("minor")),
        int(match.group("rev")) if match.group("rev") else None,
    )
def generate_session_class(
    omc_interface_xml: etree._ElementTree,
) -> Code:
    elements_code = Code()
    code = Code(
        "class OMCSession(",
        CodeWithIndent(
            "OMCSessionBase,",
        ),
        "):",
        CodeWithIndent(
            elements_code
        )
    )

    elements_code.append("OpenModelica = OpenModelica")
    OpenModelica_Scripting, = omc_interface_xml.xpath(
        '//*[@id="OpenModelica.Scripting"]'
    )
    for modelica_class in OpenModelica_Scripting.xpath('./classes/*'):
        if modelica_class.tag == "package":
            continue

        className = TypeName(modelica_class.attrib["id"])
        if is_supported_element(modelica_class):
            elements_code.append(
                f"{className.last_identifier} = {className}"
            )
        else:
            elements_code.append(
                f"# {className.last_identifier} = {className}"
            )

    return code
Ejemplo n.º 6
0
    def _collect_action_list(et: etree._ElementTree, action_list_name: str) \
            -> Tuple[Union[None, etree._Element], Union[None, etree._Element], List[Union[None, etree._Element]]]:
        al_elem = et.find(f"*actionList[@name='{action_list_name}']")
        condition = et.xpath(f"*/condition/actionListName[text()='{action_list_name}']/..")[0]

        if al_elem is None or condition is None:
            return None, None, [None]

        # Collect affected state objects
        state_objects = list()
        for state_object_name in condition.xpath(f"stateCondition/stateObjectName"):
            state_object = et.find(f"*stateObject[@name='{state_object_name.text}']")
            if state_object is not None:
                state_objects.append(state_object)

        return al_elem, condition, state_objects
Ejemplo n.º 7
0
def validate(tree: etree._ElementTree, raise_: bool = False) -> bool:
    """Validate an UBL document against its associated schema.

    Args:
        tree: The tree document to validate
        raise_: True to raise an exception if the validation fails

    Returns:
        True if the document is validated (and raise_is False), False

    Raises:
        Validation failure - see https://lxml.de/validation.html#xmlschema
    """
    # We find the appropriate XSD file
    ubl_version = ubl_version_finder_xp(tree)
    if len(ubl_version) > 0:
        ubl_version = ubl_version[0].text.strip()
    else:
        ubl_version = "2.0"
    ubl_version_tuple = tuple([int(x) for x in ubl_version.split(".")])
    if ubl_version_tuple > (2, 1):
        warnings.warn(
            f"We cannot validate UBL {ubl_version} documents. Trying anyway")

    root = tree.getroot()
    schema = get_schema(root.tag)
    if schema is None:
        if raise_:
            raise KeyError(f"No schema available for root tree {root.tag}")
        return False
    if raise_:
        schema.assertValid(tree)
        return True
    return schema.validate(tree)
Ejemplo n.º 8
0
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]:
    """Extract an interface element from an ElementTree if present."""
    try:
        interface_schema().assertValid(element_tree)
    except etree.DocumentInvalid as e:
        raise errors.InvalidNoDLDocumentError(e)
    return _parse_interface(element_tree.getroot())
Ejemplo n.º 9
0
def targets_from_span(
    span: etree._ElementTree, getter: ty.Callable[[str], etree._Element]
) -> ty.List[etree._Element]:
    """Given a span and an {id: element} dict, return the list of the tokens in this span."""
    span_id = xmlid(span)
    target = span.get(f"{TEI}target")
    if target is not None:
        try:
            return [getter(target_to_id(i)) for i in target.split()]
        except KeyError as e:
            raise ElementNotFoundError(
                f"Element targetted by span {span_id} not found", e.args[0]
            ) from e

    start_id = target_to_id(span.attrib[f"{TEI}from"])
    end_id = target_to_id(span.attrib[f"{TEI}to"])
    try:
        start_node = getter(start_id)
    except KeyError as e:
        raise ElementNotFoundError(
            f"Span {span_id} start element not found", start_id
        ) from e
    targets = [start_node]
    if start_id != end_id:
        last_node = start_node
        siblings = iter(start_node.itersiblings(*TOKEN_TAGS))
        try:
            while xmlid(last_node) != end_id:
                last_node = next(siblings)
                targets.append(last_node)
        except StopIteration:
            raise ElementNotFoundError(f"Span {span_id} end element not found", end_id)
    return targets
Ejemplo n.º 10
0
def gen_tasks(tree: _ElementTree) -> Iterator[Task]:
    for task_node in tree.findall('/body/ul/li'):
        task_dict = dict(node_to_dict(task_node))
        recurrence = None
        if task_dict.get('Recurrence info'):
            assert isinstance(task_dict, dict)
            recnode = cast(Dict[str, str], task_dict['Recurrence info'])
            recurrence = Recurrence(
                frequency=cast(Frequency, recnode['Frequency']),
                start=ensure(parse_timestamp_ms(recnode['Start'])),
                end=ensure(parse_timestamp_ms(recnode['End'])),
                hour=int(recnode['Hour of day to fire']),
                every=maybe(recnode.get('Every'), int) or 1,
                weekday_num=maybe(recnode.get('Weekday number'), int),
                day_of_month=maybe(recnode.get('Day number of month'),
                                   parse_day_num),
                day_of_week=maybe(recnode.get('Day of week'),
                                  lambda x: cast(Weekday, x)),
                month=maybe(recnode.get('Month of year'),
                            lambda x: cast(Month, x)),
            )
        simple_fields = cast(Dict[str, str], task_dict)
        task = Task(title=simple_fields['Title'],
                    created=ensure(
                        parse_timestamp_ms(simple_fields['Created time'])),
                    state=cast(State, simple_fields['State']),
                    due=maybe(simple_fields.get('Due date'),
                              lambda x: parse_timestamp_ms(x)),
                    recurrence=recurrence)
        print(task)
        yield task
Ejemplo n.º 11
0
def possible_smx_tags(lang1: str, pos: str,
                      tree: _ElementTree) -> Iterator[Tuple[str, List[str]]]:
    """Transfer sme semtags to smX lemma.

    Args:
        lang1: the language where the semtags should be fetched.
        pos: part of speech of the lemmas.
        tree: an etree containing the content of a apertium bidix file.

    Yields:
        A tuple containing a lemma of the other language and the
        semtags of the corresponding lang1 lemma.
    """
    # TODO: Merge semtags
    # Extract lemma: tags from sme .lexc file
    sme_sem_tag = {word: sem_tags for word, sem_tags in lang_tags(lang1, pos)}

    # Iterate through all lemmas in bidix where n = pos
    for symbol in tree.xpath('.//p/l/s[@n="{}"]'.format(pos)):
        # Get the bidix p element
        pair = symbol.getparent().getparent()
        # Extract sem_tags for the sme word
        sem_tags = sme_sem_tag.get(pair.find('l').text)
        if sem_tags and pair.find('r').text is not None:
            # Extract the smX lemma, add the sme semtags to it
            yield (pair.find('r').text, sorted(sem_tags))
Ejemplo n.º 12
0
    def parse_bundle_relations(self, xml_tree: etree._ElementTree) -> list:
        relation_xpath = """
        //rel_abstract_bundle |
        //rel_bundle_abstract |
        //rel_concrete_bundle |
        //rel_bundle_concrete
        """

        source_xpaht = ".//mxCell"

        relations = {}
        for relation in xml_tree.xpath(relation_xpath):
            relation_dict = dict(relation.items())
            relation_source_dict = dict(relation.find(source_xpaht).items())

            if relation_dict and relation_source_dict:
                source_id = int(relation_source_dict["target"])

                if not relations.get(source_id):
                    relations[source_id] = []

                relations[source_id].append(
                    {
                        "destination": int(relation_source_dict["source"]),
                    }
                )

        return relations
Ejemplo n.º 13
0
 def find_urls(self, tree: etree._ElementTree) -> Iterator[str]:
     """Yield URLs found in the document C{tree}."""
     for node in tree.getroot().iter():
         for attr in self.link_attrs_for_node(node.tag):
             try:
                 yield cast(str, node.attrib[attr])
             except KeyError:
                 pass
Ejemplo n.º 14
0
    def xmlGetTextNodes(self, doc: etree._ElementTree, xpath: str,
                        namespaces: dict):
        """Shorthand to retrieve serialized text nodes matching a specific xpath.

        :param lxml.etree._ElementTree doc: XML element to parse
        :param str xpath: Xpath to reach
        :param dict namespaces: XML namespaces like `lxml.etree.getroot().nsmap`
        """
        return ", ".join(doc.xpath(xpath, namespaces=namespaces))
Ejemplo n.º 15
0
def get_mentions(tree: etree._ElementTree,) -> ty.Dict[ty.Tuple[str, str], Mention]:
    """Extract the mentions from an ANCOR-TEI document."""
    mentions = tree.xpath(
        (
            './tei:standOff/tei:annotation[@tei:type="coreference"]'
            '/tei:spanGrp[@tei:subtype="mention"]/tei:span'
        ),
        namespaces=NSMAP,
    )
    if not mentions:
        raise ValueError("`tree` has no mention spans")

    features = get_fs(tree)

    texts_lst = tree.findall(f"{TEI}text")
    if not texts_lst:
        raise ValueError(
            "Attempting to extract mentions from a document without a text"
        )

    tokens_id_store = {
        xmlid(elt): elt for text in texts_lst for elt in text.iter(*TOKEN_TAGS)
    }

    res = dict()
    for m_elt in mentions:
        try:
            m = Mention.from_urs(m_elt, tokens_id_store.get, features.get)
        except ValueError as e:
            logger.warning(f"Skipping span {xmlid(m)}: {e}")
            continue
        if m.span_type not in MENTION_TYPES:
            if m.span_type in IGNORED_MENTION_TYPES:
                logger.debug(
                    f"Ignoring span {m.identifier!r} with mention type {m.span_type!r}"
                )
            else:
                logger.warning(
                    f"Span {m.identifier!r} has an invalid mention type ({m.span_type!r})"
                )
            continue
        res[(xmlid(m.targets[0]), xmlid(m.targets[-1]))] = m
    return res
Ejemplo n.º 16
0
 def unpack_element(
     tree: etree._ElementTree,
     element: Union[etree._Element, etree._ElementUnicodeResult, Any]
 ) -> Tuple[str, str]:
     """Returns path in the tree and string representation for the given XPath query element.
     """
     if isinstance(element, etree._Element):
         path = tree.getpath(element)
         text = etree.tostring(element,
                               encoding='unicode',
                               pretty_print=True)
     else:
         text = str(element)
         try:
             parent = element.getparent()
         except AttributeError:
             path = ''
         else:
             path = tree.getpath(parent)
     return (path, text)
Ejemplo n.º 17
0
def parse_additional_resources(etree: ET) -> Tuple[Tuple[str, str], ...]:
    """Parse tuple of additional resources."""
    return tuple(
        map(
            lambda et: (
                clean(et.text_content()),
                first(et.xpath(".//a/@href")),
            ),
            etree.xpath("//*[@id='additional-resources']//p"),
        )
    )
Ejemplo n.º 18
0
    def parse_root(self, xml_tree: etree._ElementTree) -> dict:
        xpath = "//root"

        for root in xml_tree.xpath(xpath):
            root_dict = dict(root.items())
            if root_dict:
                return {
                    "id": int(root_dict["id"]),
                    "name": root_dict["label"],
                }

        raise ValueError()
Ejemplo n.º 19
0
def parent_map(element_tree: etree._ElementTree) -> dict:
    """Considerando que a estrutura etree._Element não quarda ponteiro para parent, esta função
    retorna um dicionário com a estrutura {child : parent , child2 : parent2, ...}, onde 'child'
    e 'parent' são objetos do tipo lxml.etree._Element. 

    Args:
        element_tree (etree._ElementTree): Arvore xml gerada no "parser" da biblioteca lxml.

    Returns:
        dict: Dicionário que relaciona os elementos da arvore do tipo _ElementTree com seus nós
        pais.
    """
    parent_map = dict((c, p) for p in element_tree.iter() for c in p)
    return parent_map
Ejemplo n.º 20
0
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]:
    """Extract an interface element from an ElementTree if present.

    :param element_tree: parsed xml tree to operate on
    :type element_tree: etree._ElementTree
    :raises InvalidNoDLDocumentError: if tree does not adhere to schema
    :return: List of NoDL nodes present in the xml tree.
    :rtype: List[Node]
    """
    try:
        interface_schema().assertValid(element_tree)
    except etree.DocumentInvalid as e:
        raise InvalidNoDLDocumentError(e)
    return _parse_interface(element_tree.getroot())
Ejemplo n.º 21
0
def get_fs(tree: etree._ElementTree) -> ty.Dict[str, FeatureStructure]:
    """Find and parse all the feature structures in `tree`.

    Return
    -------

    A dict mapping feature structures ids to their parsed contents.
    """
    fs_lst = tree.xpath("//tei:fs", namespaces=NSMAP)
    if not fs_lst:
        raise ElementNotFoundError(
            "There are no feature structure elements in this tree"
        )

    return {xmlid(fs): parse_fs(fs) for fs in fs_lst}
def generate_module_py(
    omc_interface_xml: etree._ElementTree,
) -> Code:
    return Code(
        empty_line,
        generate_import_statements(),
        empty_line * 2,
        generate_nested_modelica_class(
            omc_interface_xml.xpath('//*[@id]')
        ).to_code(),
        empty_line * 2,
        generate_session_class(
            omc_interface_xml,
        ),
    )
Ejemplo n.º 23
0
    def xmlGetTextTag(self, doc: etree._ElementTree, xpath: str,
                      namespaces: dict, key: str):
        """Function to get information in tag when information isn't in nodes matching a specific xpath.

        :param lxml.etree._ElementTree doc: XML element to parse
        :param str xpath: Xpath to reach
        :param dict namespaces: XML namespaces like 'lxml.etree.getroot().nsmap'
        :param key : XML key to find like 'codeListValue'
        """

        tag = doc.xpath(xpath, namespaces=namespaces)
        if len(tag) > 0:
            tag = tag[0].get(key, None)
        else:
            tag = "None"

        return tag
Ejemplo n.º 24
0
    def parse_features(self, xml_tree: etree._ElementTree) -> dict:
        xpath = """
        //concrete |
        //abstract
        """

        features = {}
        for feature in xml_tree.xpath(xpath):
            feature_dict = dict(feature.items())
            if feature_dict and not FEATURE_CLON_SUFIX in feature_dict["id"]:
                feature_id = int(feature_dict["id"])
                features[feature_id] = {
                    "id": feature_id,
                    "name": feature_dict["label"],
                }

        return features
Ejemplo n.º 25
0
def list_tag_attribute_usage(tree: etree._ElementTree):
    def generate_tag_attr_paths(element, parent_path):
        children = list(element.iterchildren(tag=etree.Element))
        value = (element.text if (len(children) == 0 and element.text
                                  and element.text.strip()) else None)
        attrs = {
            k: v
            for k, v in element.attrib.items()
            # Ignore namespaced attributes
            if not k.startswith('{')
        }
        yield parent_path, element.tag, attrs, value
        path = parent_path + (element.tag, )

        for child_el in children:
            yield from generate_tag_attr_paths(child_el, path)

    yield from generate_tag_attr_paths(tree.getroot(), ())
Ejemplo n.º 26
0
def get_chains(tree: etree._ElementTree) -> ty.Dict[str, ty.Set[str]]:
    chains_grp_lst = tree.xpath(
        './tei:standOff/tei:annotation[@tei:type="coreference"]/tei:linkGrp[@tei:type="schema"]',
        namespaces=NSMAP,
    )
    chains_grp = chains_grp_lst[0]
    if len(chains_grp_lst) > 1:
        logger.warning(
            "There are more than one schema group in this document"
            f", only {xmlid(chains_grp)!r} will be taken into account"
        )

    res = dict()
    for c in chains_grp.iter(f"{TEI}link"):
        c_id = xmlid(c)
        target = c.get(f"{TEI}target")
        if target is None:
            raise ValueError(f"Schema {c_id!r} has no target attribute")
        res[c_id] = set((target_to_id(t) for t in target.split()))
    return res
Ejemplo n.º 27
0
def scan_fields(tree: _ElementTree) -> List[Dict[str, Set[str]]]:
    task_fields: Dict[str, Set[str]] = {}
    location_fields: Dict[str, Set[str]] = {}
    recurrence_fields: Dict[str, Set[str]] = {}
    for task_node in tree.findall('/body/ul/li'):
        task_dict = dict(node_to_dict(task_node))
        for key, val in task_dict.items():
            if key not in ['Recurrence info', 'Location']:
                task_fields.setdefault(key, set()).add(cast(str, val))
        if task_dict.get('Location'):
            location_dict = cast(Dict[str, str], task_dict['Location'])
            for key, val in location_dict.items():
                location_fields.setdefault(key, set()).add(val)
        if task_dict.get('Recurrence info'):
            recurrence_dict = cast(Dict[str, str],
                                   task_dict['Recurrence info'])
            for key, val in recurrence_dict.items():
                recurrence_fields.setdefault(key, set()).add(val)
    task_fields = chop(task_fields)
    location_fields = chop(location_fields)
    recurrence_fields = chop(recurrence_fields)

    return [task_fields, recurrence_fields, location_fields]
Ejemplo n.º 28
0
def generate_criteria(
    crit_def: _ElementTree
) -> Tuple[CriteriaFunction, CriteriaFunction, CriteriaFunction]:
    """
    Generates a tuple containing functions evaluating criteria.
    :param crit_def: The criteria root element.
    :return: A tuple representing functions for evaluating criteria. The first is for preconditions, the second for
    success criteria and the third for fail criteria.
    """
    from dbtypes.criteria import UnknownEvaluable
    from util.xml import xpath
    root: _Element = crit_def.getroot()
    preconditions_nodes = xpath(root, "db:precondition")
    precondition = generate_criterion(
        preconditions_nodes[0]) if preconditions_nodes else (
            lambda _: UnknownEvaluable())
    success_nodes = xpath(root, "db:success")
    success = generate_criterion(
        success_nodes[0]) if success_nodes else (lambda _: UnknownEvaluable())
    failure_nodes = xpath(root, "db:failure")
    failure = generate_criterion(
        failure_nodes[0]) if failure_nodes else (lambda _: UnknownEvaluable())
    return precondition, success, failure
Ejemplo n.º 29
0
    def parse_feature_relations(self, xml_tree: etree._ElementTree) -> dict:
        relation_xpath = """
        //rel_concrete_root |
        //rel_abstract_root |
        //rel_concrete_abstract |
        //rel_concrete_concrete |
        //rel_abstract_concrete |
        //rel_abstract_abstract
        """

        source_xpaht = ".//mxCell"

        relations = {}
        for relation in xml_tree.xpath(relation_xpath):
            relation_dict = dict(relation.items())
            relation_source_dict = dict(relation.find(source_xpaht).items())
            if relation_dict and relation_source_dict:

                # Account for inverted direction in requires relations
                if relation_dict["relType"] == "requires":
                    source_id = int(relation_source_dict["source"])
                    destination_id = int(relation_source_dict["target"])
                else:
                    source_id = int(relation_source_dict["target"])
                    destination_id = int(relation_source_dict["source"])

                if not relations.get(source_id):
                    relations[source_id] = []

                relations[source_id].append(
                    {
                        "destination": destination_id,
                        "constraint_type": relation_dict["relType"],
                    }
                )

        return relations
Ejemplo n.º 30
0
def repair_tree(tree: etree._ElementTree, content_type: str,
                report: Report) -> bool:
    """Check the document tree for general errors that would prevent
    other checkers from doing their work and repair those if possible.

    @return: True iff the tree was modified.
    """

    modified = False

    # Make sure XHTML root element has a namespace.
    if content_type == 'application/xhtml+xml':
        root = tree.getroot()
        if root.tag != '{http://www.w3.org/1999/xhtml}html':
            msg = 'The root element does not use the XHTML namespace.'
            html = concat(
                msg, xml.br, 'expected: ',
                xml.code['<html xmlns="http://www.w3.org/1999/xhtml"'])
            report.error(msg, extra={'html': html})
            # lxml will auto-fix this for us when serializing, so there is
            # no need to actually modify the tree.
            modified = True

    return modified