Beispiel #1
0
    def check_for_changes(self):
        try:
            if self.cur_fname and os.path.exists(self.cur_fname):
                cur_xml = xml_utils.node_to_string(self.metadata_root.to_xml())
                disk_xml = xml_utils.node_to_string(
                    xml_utils.fname_to_node(self.cur_fname)
                )

            if cur_xml != disk_xml:
                msg = "Do you want to save your changes?"
                alert = QDialog()
                self.last_updated = time.time()
                confirm = QMessageBox.question(
                    self,
                    "Save Changes",
                    msg,
                    QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel,
                )
                if confirm == QMessageBox.Yes:
                    xml_utils.save_to_file(self.metadata_root.to_xml(), self.cur_fname)
                elif confirm == QMessageBox.Cancel:
                    return "Cancel"
        except:
            pass
        return None
Beispiel #2
0
    def exit(self):
        """
        Before exiting check if the current contents match what is on the
        file system.  If the do not match ask user if they would like to
        save or cancel the exit

        Returns
        -------
        str :
        'Close' or 'Cancel' depending on user choice.
        """
        if self.cur_fname:
            cur_xml = xml_utils.node_to_string(self.metadata_root._to_xml())
            disk_xml = xml_utils.node_to_string(xml_utils.fname_to_node(self.cur_fname))

            if cur_xml != disk_xml:
                msg = "Would you like to save before exiting?"
                alert = QDialog()
                self.last_updated = time.time()
                confirm = QMessageBox.question(self, "File Changed", msg, QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
                if confirm == QMessageBox.Yes:
                    self.save_file()
                elif confirm == QMessageBox.Cancel:
                    return 'Cancel'
                self.cur_fname = ''

        self.close()
        return 'Close'
Beispiel #3
0
def validate_xml(xml, xsl_fname='fgdc', as_dataframe=False):
    """

    Parameters
    ----------
    xml : lxml document
                or
          filename
                or
          string containing xml representation

    xsl_fname : str (optional)
                can be one of:
                'fgdc' - uses the standard fgdc schema
                        ../resources/FGDC/fgdc-std-001-1998-annotated.xsd
                'bdp' = use the Biological Data profile schema,
                        ../resources/FGDC/BDPfgdc-std-001-1998-annotated.xsd
                full file path to another local schema.

                if not specified defaults to 'fgdc'

    Returns
    -------
        list of tuples
        (
    """

    if xsl_fname.lower() == 'fgdc':
        xsl_fname = utils.get_resource_path('fgdc/fgdc-std-001-1998-annotated.xsd')
    elif xsl_fname.lower() == 'bdp':
        xsl_fname = utils.get_resource_path('fgdc/BDPfgdc-std-001-1998-annotated.xsd')
    else:
        xsl_fname = xsl_fname

    xmlschema_doc = etree.parse(xsl_fname)
    xmlschema = etree.XMLSchema(xmlschema_doc)

    xml_str = xml_utils.node_to_string(xml_utils.xml_document_loader(xml))
    tree = etree.ElementTree(etree.fromstring(xml_str))

    if xmlschema.validate(tree):
        return []

    # tree = etree.ElementTree(xml)
    line_lookup = dict([(e.sourceline, tree.getpath(e)) for e in tree.xpath('.//*')])
    line_lookup[tree.getroot().sourceline] = tree.getpath(tree.getroot())
    errors = []
    for error in xmlschema.error_log:
        try:
            errors.append((line_lookup[error.line][1:],
                           clean_error_message(error.message), error.line))
        except KeyError:
            errors.append(('Unknown', clean_error_message(error.message),
                           error.line))

    if as_dataframe:
        cols = ['xpath', 'message', 'line number']
        return pd.DataFrame.from_records(errors, columns=cols)
    else:
        return errors
Beispiel #4
0
def validate_xml(xml, xsl_fname="fgdc", as_dataframe=False):
    """

    Parameters
    ----------
    xml : lxml document
                or
          filename
                or
          string containing xml representation

    xsl_fname : str (optional)
                can be one of:
                'fgdc' - uses the standard fgdc schema
                        ../resources/FGDC/fgdc-std-001-1998-annotated.xsd
                'bdp' = use the Biological Data profile schema,
                        ../resources/FGDC/BDPfgdc-std-001-1998-annotated.xsd
                full file path to another local schema.

                if not specified defaults to 'fgdc'
    as_dataframe : bool
                used to specify return format (list of tuples or dataframe)

    Returns
    -------
        list of tuples
        (xpath, error message, line number)
        or
        pandas dataframe
    """

    if xsl_fname.lower() == "fgdc":
        xsl_fname = utils.get_resource_path(FGDC_XSD_NAME)
    elif xsl_fname.lower() == "bdp":
        xsl_fname = utils.get_resource_path(BDP_XSD_NAME)
    else:
        xsl_fname = xsl_fname

    xmlschema = xml_utils.load_schema(xsl_fname)
    xml_doc = xml_utils.xml_document_loader(xml)
    xml_str = xml_utils.node_to_string(xml_doc)

    tree_node = xml_utils.string_to_node(xml_str.encode("utf-8"))
    lxml._etree._ElementTree(tree_node)

    errors = []
    srcciteas = []

    src_xpath = "dataqual/lineage/srcinfo/srccitea"
    src_nodes = tree_node.xpath(src_xpath)
    for i, src in enumerate(src_nodes):
        srcciteas.append(src.text)
        if src.text is None:
            if len(src_nodes) == 1:
                errors.append((
                    "metadata/" + src_xpath,
                    "source citation abbreviation cannot be empty",
                    1,
                ))
            else:
                xpath = "metadata/dataqual/lineage/srcinfo[{}]/srccitea"
                errors.append((
                    xpath.format(i + 1),
                    "source citation abbreviation cannot be empty",
                    1,
                ))
    procstep_xpath = "dataqual/lineage/procstep"
    procstep_nodes = tree_node.xpath(procstep_xpath)
    for proc_i, proc in enumerate(procstep_nodes):
        srcprod_nodes = proc.xpath("srcprod")
        for srcprod_i, srcprod in enumerate(srcprod_nodes):
            srcciteas.append(srcprod.text)
            if srcprod.text is None:
                error_xpath = procstep_xpath
                if len(procstep_nodes) > 1:
                    error_xpath += "[{}]".format(proc_i + 1)
                error_xpath += "/srcprod"
                if len(srcprod_nodes) > 1:
                    error_xpath += "[{}]".format(proc_i + 1)
                errors.append((
                    "metadata/" + error_xpath,
                    "source produced abbreviation cannot be empty",
                    1,
                ))

    srcused_xpath = "dataqual/lineage/procstep/srcused"
    srcused_nodes = tree_node.xpath(srcused_xpath)
    for i, src in enumerate(srcused_nodes):
        if src.text not in srcciteas:
            if len(srcused_nodes) == 1:
                errors.append((
                    "metadata/" + srcused_xpath,
                    "Source Used Citation Abbreviation {} "
                    "not found in Source inputs "
                    "used".format(src.text),
                    1,
                ))
            else:
                xpath = "metadata/dataqual/lineage/procstep[{}]/srcused"
                errors.append((
                    xpath.format(i + 1),
                    "Source Used Citation Abbreviation {} "
                    "not found in Source inputs "
                    "used".format(src.text),
                    1,
                ))

    if xmlschema.validate(tree_node) and not errors:
        return []

    line_lookup = dict([(e.sourceline, tree_node.getroottree().getpath(e))
                        for e in tree_node.xpath(".//*")])
    sourceline = tree_node.sourceline
    line_lookup[sourceline] = tree_node.getroottree().getpath(tree_node)

    fgdc_lookup = get_fgdc_lookup()

    for error in xmlschema.error_log:
        error_msg = clean_error_message(error.message, fgdc_lookup)
        try:
            errors.append((line_lookup[error.line][1:], error_msg, error.line))
        except KeyError:
            errors.append(("Unknown", error_msg, error.line))

    errors = list(OrderedDict.fromkeys(errors))

    if as_dataframe:
        cols = ["xpath", "message", "line number"]
        return pd.DataFrame.from_records(errors, columns=cols)
    else:
        return errors