Beispiel #1
0
def _get_xml(url, payload, **kwargs):
    tries = 0
    while tries < 5:
        try:
            out = utils.requests_pem_get(url, params=payload)
            out.raise_for_status()
            tt = xml_utils.string_to_node(out.content)
            return tt
        except:
            tries += 1
    out = utils.requests_pem_get(url, params=payload)
    out.raise_for_status()
    tt = xml_utils.string_to_node(out.content)
    return tt
Beispiel #2
0
def get_usgs_contact_info(ad_username, as_dictionary=True):
    """

    Parameters
    ----------
    ad_username : str
                  The active directory username to return the
                  contact information for
    as_dictionary : bool
                    specify return format as nested dictionary or lxml element
    Returns
    -------
        None if ad_username is not found
        FGDC Contact Section as dictionary or lxml element
    """

    result = requests_pem_get(USGS_AD_URL.format(ad_username))
    element = xml_utils.string_to_node(result.content)

    try:
        if element.xpath('cntperp/cntper')[0].text == 'GS ScienceBase':
            element.xpath('cntperp')[0].tag = 'cntorgp'
    except:
        pass

    if as_dictionary:
        return xml_utils.node_to_dict(element)
    else:
        return element
Beispiel #3
0
    def dropEvent(self, e):
        """
        Updates the form with the contents of an xml node dropped onto it.
        Parameters
        ----------
        e : qt event
        Returns
        -------
        None
        """
        try:
            e.setDropAction(Qt.CopyAction)
            e.accept()
            mime_data = e.mimeData()
            if mime_data.hasUrls() or self.is_doi_str(mime_data.text()):
                if self.is_doi_str(mime_data.text()):
                    doi = mime_data.text()
                else:
                    doi = e.mimeData().urls()[0].url()
                try:
                    citeinfo = doi_utils.get_doi_citation(doi)
                    self.from_xml(citeinfo.to_xml())
                except:
                    msg = "We ran into a problem creating a citeinfo element from that DOI({})".format(
                        doi)
                    msg += "Check the DOI and/or manually create the citation for it"
                    QMessageBox.warning(self, "Problem DOI", msg)
            else:
                element = xml_utils.string_to_node(mime_data.text())

                self.from_xml(element)
        except:
            e = sys.exc_info()[0]
            print("problem drop", e)
Beispiel #4
0
    def dragEnterEvent(self, e):
        """
        Only accept Dragged items that can be converted to an xml object with
        a root tag called in our list of acceptable_tags
        Parameters
        ----------
        e : qt event
        Returns
        -------
        """
        mime_data = e.mimeData()

        if e.mimeData().hasUrls():
            if "doi" in e.mimeData().urls()[0].url().lower():
                e.accept()
        elif e.mimeData().hasFormat("text/plain"):
            if self.is_doi_str(mime_data.text()):
                e.accept()
            else:
                try:
                    element = xml_utils.string_to_node(mime_data.text())
                    if element is not None and element.tag in self.acceptable_tags:
                        e.accept()
                except AttributeError:
                    e.ignore()
        else:
            e.ignore()
    def paste_mime(self):
        """
        Grab the last mime data off the clipboard and attempt to paste it
        into this widget.

        Returns
        -------
        None
        """
        clipboard = QApplication.clipboard()
        mime_data = clipboard.mimeData()
        if mime_data.hasFormat('text/plain'):
            element = xml_utils.string_to_node(mime_data.text())
            if element is not None:
                self.from_xml(element)
            else:
                msg = "There was a problem pasting that content."
                msg += "\n that content being drops does not appear to be an xml element"
                QMessageBox.warning(self, "Paste Error", msg)
Beispiel #6
0
    def dropEvent(self, e):
        """
        Updates the form with the contents of an xml node dropped onto it.
        Parameters
        ----------
        e : qt event
        Returns
        -------
        None
        """
        try:
            e.setDropAction(Qt.CopyAction)
            e.accept()
            mime_data = e.mimeData()
            element = xml_utils.string_to_node(mime_data.text())

            self.from_xml(element)
        except:
            e = sys.exc_info()[0]
            print('problem drop', e)
Beispiel #7
0
    def dragEnterEvent(self, e):
        """
        Only accept Dragged items that can be converted to an xml object with
        a root tag called 'datacred'
        Parameters
        ----------
        e : qt event

        Returns
        -------
        None

        """
        mime_data = e.mimeData()
        if e.mimeData().hasFormat("text/plain"):
            element = xml_utils.string_to_node(mime_data.text())
            if element is not None and element.tag == "datacred":
                e.accept()
        else:
            e.ignore()
Beispiel #8
0
def _get_xml(url, payload, **kwargs):
    out = utils.requests_pem_get(url, params=payload)
    out.raise_for_status()
    tt = xml_utils.string_to_node(out.content)
    return tt
Beispiel #9
0
def validate_xml(xml, xsl_fname="fgdc", as_dataframe=False):
    """

    Parameters
    ----------
    xml : lxml document
                or
          filename
                or
          string containing xml representation

    xsl_fname : str (optional)
                can be one of:
                'fgdc' - uses the standard fgdc schema
                        ../resources/FGDC/fgdc-std-001-1998-annotated.xsd
                'bdp' = use the Biological Data profile schema,
                        ../resources/FGDC/BDPfgdc-std-001-1998-annotated.xsd
                full file path to another local schema.

                if not specified defaults to 'fgdc'
    as_dataframe : bool
                used to specify return format (list of tuples or dataframe)

    Returns
    -------
        list of tuples
        (xpath, error message, line number)
        or
        pandas dataframe
    """

    if xsl_fname.lower() == "fgdc":
        xsl_fname = utils.get_resource_path(FGDC_XSD_NAME)
    elif xsl_fname.lower() == "bdp":
        xsl_fname = utils.get_resource_path(BDP_XSD_NAME)
    else:
        xsl_fname = xsl_fname

    xmlschema = xml_utils.load_schema(xsl_fname)
    xml_doc = xml_utils.xml_document_loader(xml)
    xml_str = xml_utils.node_to_string(xml_doc)

    tree_node = xml_utils.string_to_node(xml_str.encode("utf-8"))
    lxml._etree._ElementTree(tree_node)

    errors = []
    srcciteas = []

    src_xpath = "dataqual/lineage/srcinfo/srccitea"
    src_nodes = tree_node.xpath(src_xpath)
    for i, src in enumerate(src_nodes):
        srcciteas.append(src.text)
        if src.text is None:
            if len(src_nodes) == 1:
                errors.append((
                    "metadata/" + src_xpath,
                    "source citation abbreviation cannot be empty",
                    1,
                ))
            else:
                xpath = "metadata/dataqual/lineage/srcinfo[{}]/srccitea"
                errors.append((
                    xpath.format(i + 1),
                    "source citation abbreviation cannot be empty",
                    1,
                ))
    procstep_xpath = "dataqual/lineage/procstep"
    procstep_nodes = tree_node.xpath(procstep_xpath)
    for proc_i, proc in enumerate(procstep_nodes):
        srcprod_nodes = proc.xpath("srcprod")
        for srcprod_i, srcprod in enumerate(srcprod_nodes):
            srcciteas.append(srcprod.text)
            if srcprod.text is None:
                error_xpath = procstep_xpath
                if len(procstep_nodes) > 1:
                    error_xpath += "[{}]".format(proc_i + 1)
                error_xpath += "/srcprod"
                if len(srcprod_nodes) > 1:
                    error_xpath += "[{}]".format(proc_i + 1)
                errors.append((
                    "metadata/" + error_xpath,
                    "source produced abbreviation cannot be empty",
                    1,
                ))

    srcused_xpath = "dataqual/lineage/procstep/srcused"
    srcused_nodes = tree_node.xpath(srcused_xpath)
    for i, src in enumerate(srcused_nodes):
        if src.text not in srcciteas:
            if len(srcused_nodes) == 1:
                errors.append((
                    "metadata/" + srcused_xpath,
                    "Source Used Citation Abbreviation {} "
                    "not found in Source inputs "
                    "used".format(src.text),
                    1,
                ))
            else:
                xpath = "metadata/dataqual/lineage/procstep[{}]/srcused"
                errors.append((
                    xpath.format(i + 1),
                    "Source Used Citation Abbreviation {} "
                    "not found in Source inputs "
                    "used".format(src.text),
                    1,
                ))

    if xmlschema.validate(tree_node) and not errors:
        return []

    line_lookup = dict([(e.sourceline, tree_node.getroottree().getpath(e))
                        for e in tree_node.xpath(".//*")])
    sourceline = tree_node.sourceline
    line_lookup[sourceline] = tree_node.getroottree().getpath(tree_node)

    fgdc_lookup = get_fgdc_lookup()

    for error in xmlschema.error_log:
        error_msg = clean_error_message(error.message, fgdc_lookup)
        try:
            errors.append((line_lookup[error.line][1:], error_msg, error.line))
        except KeyError:
            errors.append(("Unknown", error_msg, error.line))

    errors = list(OrderedDict.fromkeys(errors))

    if as_dataframe:
        cols = ["xpath", "message", "line number"]
        return pd.DataFrame.from_records(errors, columns=cols)
    else:
        return errors