def populate_taxonomic_coverage_form(form: TaxonomicCoverageForm, node: Node): general_taxonomic_coverage_node = node.find_child( names.GENERALTAXONOMICCOVERAGE) if general_taxonomic_coverage_node: form.general_taxonomic_coverage.data = general_taxonomic_coverage_node.content hierarchy = [] taxonomic_classification_node = node.find_child( names.TAXONOMICCLASSIFICATION) populate_taxonomic_coverage_form_aux(hierarchy, taxonomic_classification_node) form.hierarchy.data = hierarchy[::-1] first_taxon = hierarchy[-1] form.taxon_value.data = first_taxon[1] taxon_rank = first_taxon[0].capitalize() if (taxon_rank, taxon_rank) in form.taxon_rank.choices: form.taxon_rank.data = taxon_rank if first_taxon[5]: form.taxonomic_authority.data = first_taxon[5] have_links = False for taxon in hierarchy: if taxon[4]: have_links = True break form.md5.data = form_md5(form) return have_links
def populate_method_step_form(form: MethodStepForm, ms_node: Node): description = '' instrumentation = '' if ms_node: description_node = ms_node.find_child(names.DESCRIPTION) if description_node: if description_node.content: description = description_node.content else: section_node = description_node.find_child(names.SECTION) if section_node: description = remove_paragraph_tags(section_node.content) else: para_node = description_node.find_child(names.PARA) if para_node: description = para_node.content instrumentation_node = ms_node.find_child(names.INSTRUMENTATION) if instrumentation_node: instrumentation = instrumentation_node.content form.description.data = remove_paragraph_tags(description) form.instrumentation.data = instrumentation form.md5.data = form_md5(form)
def populate_project_form(form: ProjectForm, project_node: Node): title = '' abstract = '' if project_node: title_node = project_node.find_child(names.TITLE) if title_node: title = title_node.content abstract_node = project_node.find_child(names.ABSTRACT) if abstract_node: abstract = abstract_node.content if not abstract: para_node = abstract_node.find_child(names.PARA) if para_node: abstract = para_node.content else: section_node = abstract_node.find_child(names.SECTION) if section_node: abstract = section_node.content abstract = abstract form.title.data = title form.abstract.data = remove_paragraph_tags(abstract) form.md5.data = form_md5(form)
def populate_method_step_form(form: MethodStepForm, ms_node: Node): description = '' instrumentation = '' data_sources = '' if ms_node: description_node = ms_node.find_child(names.DESCRIPTION) if description_node: description = display_texttype_node(description_node) if data_sources_marker_begin in description and data_sources_marker_end in description: begin = description.find(data_sources_marker_begin) end = description.find(data_sources_marker_end) data_sources = description[begin + len(data_sources_marker_begin) + 1:end - 1] description = description[0:begin - 1] instrumentation_node = ms_node.find_child(names.INSTRUMENTATION) if instrumentation_node: instrumentation = instrumentation_node.content form.description.data = description form.instrumentation.data = instrumentation form.data_sources.data = data_sources form.md5.data = form_md5(form)
def test_add_child(self): child_1 = Node(names.ACCESS) self.node.add_child(child_1) children = self.node.children self.assertIs(child_1, children[0]) child_2 = Node(names.DATASET) self.node.add_child(child_2, 0) self.assertIs(child_2, children[0])
def test_add_child(node): child_1 = Node(names.ACCESS) node.add_child(child_1) children = node.children assert child_1 is children[0] child_2 = Node(names.DATASET) node.add_child(child_2, 0) assert child_2 is children[0]
def test_taxonid(): taxonId = Node(names.TAXONID, parent=None) taxonId.content = "42" # without the provider, we should get an error with pytest.raises(MetapypeRuleError): validate.node(taxonId) # with the provider, it should be ok taxonId.add_attribute("provider", "https://www.itis.gov") validate.node(taxonId)
def test_find_single_node_by_path(node): access = Node(names.ACCESS) node.add_child(access) child = node.find_single_node_by_path([names.ACCESS]) assert access is child allow = Node(names.ALLOW) access.add_child(allow) grandchild = node.find_single_node_by_path([names.ACCESS, names.ALLOW]) assert grandchild is allow permission = Node(names.PERMISSION) allow.add_child(permission) great_grandchild = node.find_single_node_by_path( [names.ACCESS, names.ALLOW, names.PERMISSION]) assert great_grandchild is permission child = node.find_single_node_by_path( [names.ACCESS, names.ALLOW, "nonesuch"]) assert child is None child = node.find_single_node_by_path([]) assert child is None child = node.find_single_node_by_path(None) assert child is None
def clear_taxonomic_coverage(package_name): # When user selects Clear Taxonomic Coverage to get rid of coverage imported from XML, we delete it and # set the flag that says its OK for ezEML to handle even though the package was imported from XML. eml_node = load_eml(filename=package_name) coverage_node = eml_node.find_single_node_by_path([names.DATASET, names.COVERAGE]) if coverage_node: taxonomic_coverage_nodes = coverage_node.find_all_children(names.TAXONOMICCOVERAGE) for taxonomic_coverage_node in taxonomic_coverage_nodes: coverage_node.remove_child(taxonomic_coverage_node) Node.delete_node_instance(taxonomic_coverage_node.id) clear_taxonomy_imported_from_xml(eml_node, package_name)
def maintenance(filename=None): form = MaintenanceForm(filename=filename) eml_node = load_eml(filename=filename) if eml_node: dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET, parent=eml_node) add_child(eml_node, dataset_node) # Process POST if request.method == 'POST' and BTN_CANCEL in request.form: url = url_for(PAGE_MAINTENANCE, filename=filename) return redirect(url) if request.method == 'POST' and form.validate_on_submit(): save = False if is_dirty_form(form): save = True if save: maintenace_description = form.description.data valid, msg = is_valid_xml_fragment(maintenace_description, names.MAINTENANCE) if not valid: flash(invalid_xml_error_message(msg, False, names.DESCRIPTION), 'error') return render_get_maintenance_page(eml_node, form, filename) update_frequency = form.update_frequency.data create_maintenance(dataset_node, maintenace_description, update_frequency) save_both_formats(filename=filename, eml_node=eml_node) form_value = request.form form_dict = form_value.to_dict(flat=False) new_page = PAGE_MAINTENANCE if form_dict: for key in form_dict: val = form_dict[key][0] # value is the first list element if val == BTN_SAVE_AND_CONTINUE: new_page = PAGE_PUBLISHER else: new_page = handle_hidden_buttons(new_page, PAGE_MAINTENANCE) return redirect(url_for(new_page, filename=filename)) # Process GET if dataset_node: maintenance_node = dataset_node.find_child(names.MAINTENANCE) if maintenance_node: populate_maintenance_form(form, maintenance_node) return render_get_maintenance_page(eml_node, form, filename)
def display_simple_texttype_node(text_node: Node = None) -> str: # Currently, this handles simple cases with paras only (paras may be contained in sections) if not text_node: return '' if text_node.content: return text_node.content text = '' para_nodes = [] text_node.find_all_descendants(names.PARA, para_nodes) for para_node in para_nodes: if para_node.content: text += f'{para_node.content}\n' return text.replace('<', '<').replace('>', '>')
def add_geo_coverage_node(eml_node, description, north, south, east, west): dataset_node = eml_node.find_child(names.DATASET) if not dataset_node: dataset_node = Node(names.DATASET) coverage_node = dataset_node.find_child(names.COVERAGE) if not coverage_node: coverage_node = Node(names.COVERAGE, parent=dataset_node) add_child(dataset_node, coverage_node) gc_node = Node(names.GEOGRAPHICCOVERAGE, parent=coverage_node) add_child(coverage_node, gc_node) create_geographic_coverage(gc_node, description, west, east, north, south)
def expand(node: Node): references = list() node.find_all_descendants(names.REFERENCES, references) ids = _register_ids(node) for reference in references: if reference.content not in ids: msg = f"ID not found for REFERENCE '{reference}'" raise ValueError(msg) source_node = ids[reference.content] destination_node = reference.parent destination_node.remove_child(reference) Node.delete_node_instance(reference.id) for source_child in source_node.children: source_child_copy = source_child.copy() destination_node.add_child(source_child_copy)
def test_remove_child(self): access = Node(names.ACCESS) self.node.add_child(access) child = self.node.children[0] self.assertIs(access, child) self.node.remove_child(child) self.assertNotIn(access, self.node.children)
def test_remove_child(node): access = Node(names.ACCESS) node.add_child(access) child = node.children[0] assert access is child node.remove_child(child) assert access not in node.children
def get_attribute_type(attrib_node: Node): mscale_node = attrib_node.find_child(names.MEASUREMENTSCALE) # Formerly, Categorical variables were nominal. But now that we're importing externally created XML # files, they may be ordinal. nominal_or_ordinal_node = mscale_node.find_child(names.NOMINAL) if not nominal_or_ordinal_node: nominal_or_ordinal_node = mscale_node.find_child(names.ORDINAL) if nominal_or_ordinal_node: enumerated_domain_node = nominal_or_ordinal_node.find_single_node_by_path( [names.NONNUMERICDOMAIN, names.ENUMERATEDDOMAIN]) if enumerated_domain_node: return metapype_client.VariableType.CATEGORICAL text_domain_node = nominal_or_ordinal_node.find_single_node_by_path( [names.NONNUMERICDOMAIN, names.TEXTDOMAIN]) if text_domain_node: return metapype_client.VariableType.TEXT # Formerly, Numerical variables were ratio. But now that we're importing externally created XML # files, they may be interval. ratio_or_interval_node = mscale_node.find_child(names.RATIO) if not ratio_or_interval_node: ratio_or_interval_node = mscale_node.find_child(names.INTERVAL) if ratio_or_interval_node: return metapype_client.VariableType.NUMERICAL datetime_node = mscale_node.find_child(names.DATETIME) if datetime_node: return metapype_client.VariableType.DATETIME return None
def populate_access_rule_form(form: AccessForm, allow_node: Node): userid = '' permission = '' if allow_node: principal_node = allow_node.find_child(names.PRINCIPAL) if principal_node: userid = principal_node.content permission_node = allow_node.find_child(names.PERMISSION) if permission_node: permission = permission_node.content form.userid.data = userid form.permission.data = permission form.md5.data = form_md5(form)
def populate_maintenance_form(form: MaintenanceForm, maintenance_node: Node): description = '' update_frequency = '' if maintenance_node: description_node = maintenance_node.find_child(names.DESCRIPTION) if description_node: description = display_texttype_node(description_node) update_frequency_node = maintenance_node.find_child(names.MAINTENANCEUPDATEFREQUENCY) if update_frequency_node: update_frequency = update_frequency_node.content form.description.data = description form.update_frequency.data = update_frequency form.md5.data = form_md5(form)
def check_data_table(eml_node, filename, data_table_node: Node): link = url_for(PAGE_DATA_TABLE, filename=filename, node_id=data_table_node.id) validation_errs = validate_via_metapype(data_table_node) check_data_table_md5_checksum(data_table_node, link) if find_min_unmet(validation_errs, names.DATATABLE, names.ENTITYNAME): add_to_evaluation('data_table_01', link) if find_min_unmet(validation_errs, names.DATATABLE, names.ENTITYDESCRIPTION): add_to_evaluation('data_table_02', link) if find_min_unmet(validation_errs, names.PHYSICAL, names.OBJECTNAME): add_to_evaluation('data_table_03', link) if find_min_unmet(validation_errs, names.DATATABLE, names.ATTRIBUTELIST): add_to_evaluation('data_table_04', link) evaluation_warnings = evaluate_via_metapype(data_table_node) if find_err_code(evaluation_warnings, EvaluationWarning.DATATABLE_DESCRIPTION_MISSING, names.DATATABLE): add_to_evaluation('data_table_02', link) attribute_list_node = data_table_node.find_child(names.ATTRIBUTELIST) if attribute_list_node: attribute_nodes = attribute_list_node.find_all_children( names.ATTRIBUTE) for attribute_node in attribute_nodes: check_attribute(eml_node, filename, data_table_node, attribute_node)
def populate_taxonomic_coverage_form_aux(hierarchy, node: Node = None): if node: taxon_rank_name_node = node.find_child(names.TAXONRANKNAME) taxon_rank_value_node = node.find_child(names.TAXONRANKVALUE) taxon_common_name_node = node.find_child(names.COMMONNAME) taxon_id_node = node.find_child(names.TAXONID) if taxon_rank_name_node: taxon_rank_name = taxon_rank_name_node.content else: taxon_rank_name = None if taxon_rank_value_node: taxon_rank_value = taxon_rank_value_node.content else: taxon_rank_value = None if taxon_common_name_node: taxon_common_name = taxon_common_name_node.content else: taxon_common_name = '' if taxon_id_node: taxon_id = taxon_id_node.content provider_uri = taxon_id_node.attribute_value(names.PROVIDER) else: taxon_id = None provider_uri = None if taxon_rank_name and taxon_rank_value: link = None provider = None if taxon_id: if provider_uri == "https://www.itis.gov": link = f'https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value={taxon_id}' provider = 'ITIS' elif provider_uri == "https://www.ncbi.nlm.nih.gov/taxonomy": link = f'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id={taxon_id}' provider = 'NCBI' elif provider_uri == "http://www.marinespecies.org": link = f'http://marinespecies.org/aphia.php?p=taxdetails&id={taxon_id}' provider = 'WORMS' hierarchy.append((taxon_rank_name, taxon_rank_value, taxon_common_name, taxon_id, link, provider)) taxonomic_classification_node = node.find_child( names.TAXONOMICCLASSIFICATION) if taxonomic_classification_node: populate_taxonomic_coverage_form_aux( hierarchy, taxonomic_classification_node)
def to_xml(node: Node, level: int = 0) -> str: xml = "" closed = False boiler = ('xmlns:eml="https://eml.ecoinformatics.org/eml-2.2.0" ' 'xmlns:stmml="http://www.xml-cml.org/schema/stmml-1.2" ' 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ' 'xsi:schemaLocation="https://eml.ecoinformatics.org/eml-2.2.0 ' 'https://nis.lternet.edu/schemas/EML/eml-2.2.0/xsd/eml.xsd"') name = node.name attributes = "" for attribute in node.attributes: attributes += ' {0}="{1}"'.format(attribute, node.attributes[attribute]) if level == 0: indent = "" if name == "eml": name = node.name + ":" + node.name attributes += " " + boiler else: indent = space * level open_tag = "<" + name + attributes + ">" close_tag = "</" + name + ">" xml += indent + open_tag if node.content is not None: if isinstance(node.content, str): # if it hasn't been escaped already, escape it if all(x not in node.content for x in ('&', '<', '>')): node.content = escape(node.content) # Hopefully, this is a temporary hack. Need to figure out a better way... # The problem is that <para> tags are treated idiosyncratically because their rules aren't fully # supported. They appear within node content, unlike other tags. node.content = node.content.replace('<para>', '<para>').replace( '</para>', '</para>') xml += str(node.content) + close_tag + "\n" closed = True elif len(node.children) > 0: xml += "\n" for child in node.children: xml += to_xml(child, level + 1) if not closed: if len(node.children) > 0: xml += indent xml += close_tag + "\n" return xml
def prune(n: Node, strict: bool = False) -> list: """ Prune in place all non-valid nodes from the tree Args: n: Node strict: Returns: List of pruned nodes Side-effects: Non-valid nodes are pruned from the tree """ pruned = list() if n.name != "metadata": try: node(n) except UnknownNodeError as ex: logger.debug(f"Pruning: {n.name}") pruned.append((n, str(ex))) if n.parent is not None: n.parent.remove_child(n) Node.delete_node_instance(n.id) return pruned except ChildNotAllowedError as ex: r = rule.get_rule(n.name) children = n.children.copy() for child in children: if not r.is_allowed_child(child.name): logger.debug(f"Pruning: {child.name}") pruned.append((child, str(ex))) n.remove_child(child) Node.delete_node_instance(child.id) except MetapypeRuleError as ex: logger.info(ex) children = n.children.copy() for child in children: pruned += prune(child, strict) if strict and child not in pruned: try: node(child) except MetapypeRuleError as ex: logger.debug(f"Pruning: {child.name}") pruned.append((child, str(ex))) n.remove_child(child) Node.delete_node_instance(child.id) return pruned
def populate_geographic_coverage_form(form: GeographicCoverageForm, node: Node): geographic_description_node = node.find_child(names.GEOGRAPHICDESCRIPTION) if geographic_description_node: form.geographic_description.data = geographic_description_node.content wbc_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.WESTBOUNDINGCOORDINATE ]) if wbc_node: form.wbc.data = wbc_node.content ebc_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.EASTBOUNDINGCOORDINATE ]) if ebc_node: form.ebc.data = ebc_node.content nbc_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.NORTHBOUNDINGCOORDINATE ]) if nbc_node: form.nbc.data = nbc_node.content sbc_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.SOUTHBOUNDINGCOORDINATE ]) if sbc_node: form.sbc.data = sbc_node.content amin_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.BOUNDINGALTITUDES, names.ALTITUDEMINIMUM ]) if amin_node: form.amin.data = amin_node.content amax_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.BOUNDINGALTITUDES, names.ALTITUDEMAXIMUM ]) if amax_node: form.amax.data = amax_node.content aunits_node = node.find_single_node_by_path([ names.BOUNDINGCOORDINATES, names.BOUNDINGALTITUDES, names.ALTITUDEUNITS ]) if aunits_node: form.aunits.data = aunits_node.content form.md5.data = form_md5(form)
def populate_project_form(form: ProjectForm, project_node: Node): title = '' abstract = '' if project_node: title_node = project_node.find_child(names.TITLE) if title_node: title = title_node.content abstract_node = project_node.find_child(names.ABSTRACT) post_process_texttype_node(abstract_node) funding_node = project_node.find_child(names.FUNDING) post_process_texttype_node(funding_node) form.title.data = title form.abstract.data = display_texttype_node(abstract_node) form.funding.data = display_texttype_node(funding_node) form.md5.data = form_md5(form)
def populate_temporal_coverage_form(form: TemporalCoverageForm, node: Node): begin_date_node = node.find_single_node_by_path( [names.RANGEOFDATES, names.BEGINDATE]) if begin_date_node: calendar_date_node = begin_date_node.find_child(names.CALENDARDATE) form.begin_date.data = calendar_date_node.content end_date_node = node.find_single_node_by_path( [names.RANGEOFDATES, names.ENDDATE]) if end_date_node: calendar_date_node = end_date_node.find_child(names.CALENDARDATE) form.end_date.data = calendar_date_node.content else: single_date_time_node = node.find_child(names.SINGLEDATETIME) if single_date_time_node: calendar_date_node = single_date_time_node.find_child( names.CALENDARDATE) form.begin_date.data = calendar_date_node.content form.md5.data = form_md5(form)
def populate_keyword_form(form: KeywordForm, kw_node: Node): keyword = '' keyword_type = '' if kw_node: keyword = kw_node.content if kw_node.content else '' kw_type = kw_node.attribute_value('keywordType') keyword_type = kw_type if kw_type else '' form.keyword.data = keyword form.keyword_type.data = keyword_type form.md5.data = form_md5(form)
def test_find_descendant(node): access = Node(names.ACCESS) node.add_child(access) child = node.find_descendant(names.ACCESS) assert access is child allow = Node(names.ALLOW) access.add_child(allow) grandchild = node.find_descendant(names.ALLOW) assert grandchild is allow permission = Node(names.PERMISSION) allow.add_child(permission) great_grandchild = node.find_descendant(names.PERMISSION) assert great_grandchild is permission child = node.find_descendant("nonesuch") assert child is None
def test_find_child(self): access = Node(names.ACCESS) self.node.add_child(access) child = self.node.find_child(names.ACCESS) self.assertIs(access, child) allow = Node(names.ALLOW) access.add_child(allow) grandchild = self.node.find_child(names.ALLOW) self.assertIs(grandchild, allow) permission = Node(names.PERMISSION) allow.add_child(permission) great_grandchild = self.node.find_child(names.PERMISSION) self.assertIs(great_grandchild, permission) child = self.node.find_child('nonesuch') self.assertIs(child, None)
def from_xml_element(xml_elem, metapype_node, metapype_parent): """ Creates a metapype node corresponding to an xml element. Args: xml_elem: the xml element. metapype_node: the metapype_node corresponding to that xml element. metapype_node == None, except at the root of the tree. metapype_parent: the parent metapype_node for this node. """ if metapype_node is None: # Will be None except at the root metapype_node = Node(name=xml_elem.tag, parent=metapype_parent) # xml_element_lookup_by_node_id[metapype_node.id] = (metapype_node, xml_elem) for name, value in xml_elem.attrib.items(): if '}' not in name: metapype_node.add_attribute(name, value) if xml_elem.text: metapype_node.content = xml_elem.text if metapype_parent is not None: metapype_parent.add_child(metapype_node) for xml_child in xml_elem: from_xml_element(xml_child, None, metapype_node)
def test_validate_prune(): if "TEST_DATA" in os.environ: xml_path = os.environ["TEST_DATA"] else: xml_path = tests.test_data_path with open(f"{xml_path}/eml.xml", "r") as f: xml = "".join(f.readlines()) eml = metapype_io.from_xml(xml) assert isinstance(eml, Node) referencePublication = Node("referencePublication") usageCitation = Node("usageCitation") dataset = eml.find_single_node_by_path([names.DATASET]) dataset.add_child(referencePublication) dataset.add_child(usageCitation) errs = list() validate.tree(eml, errs) assert len(errs) > 0 validate.prune(eml) errs = list() validate.tree(eml, errs) assert len(errs) == 0