def write_tree(self, file_name): # Deannotate the tree objectify.deannotate(self.tree) etree.cleanup_namespaces(self.tree) # Ensure the newly created XML validates against the schema utilities.validate_xml(self.tree, self.xml_schema_file) # Write out the tree self.tree.write(file_name, pretty_print=True)
def create_report_metadata(self): """ Create the XML file containing metadata to be written into the accuracy assessment report Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Connect to the lemma web database web_db = web_database.WebDatabase(p.model_project, p.model_region, p.web_dsn) # Create the XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/report_metadata.xsd' root_str = """ <report_metadata xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ( 'http://www.w3.org/2001/XMLSchema-instance', xml_schema_file ) #root_str = "<report_metadata/>" root_elem = objectify.fromstring(root_str) # Get the model region overview mr_overview = web_db.get_model_region_info() field_names = mr_overview.dtype.names overview_elem = etree.SubElement(root_elem, 'overview') for f in field_names: child = etree.SubElement(overview_elem, f.lower()) overview_elem[child.tag] = getattr(mr_overview[0], f) # Get contact info for people associated with this project people_info = web_db.get_people_info() field_names = people_info.dtype.names people_elem = etree.SubElement(root_elem, 'contact_information') for person in people_info: person_elem = etree.SubElement(people_elem, 'contact') for f in field_names: child = etree.SubElement(person_elem, f.lower()) person_elem[child.tag] = getattr(person, f) # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Subset the string of plot IDs to thin to one plot at a # location just for locations that have the exact same spectral # values for all plot measurements (i.e. places where the # imagery has been stabilized delete_list = self.plot_db.get_duplicate_plots_to_remove(self.id_str) if len(delete_list) > 0: id_list_subset = [int(x) for x in self.id_str.split(",")] for id in delete_list: try: id_list_subset.remove(id) # if the ID is not in the list, go on to the next ID except ValueError: continue # turn subsetted id_list into a string id_str_subset = ','.join(map(str, id_list_subset)) else: id_str_subset = self.id_str # Get the plot data sources data_sources = self.plot_db.get_plot_data_source_summary(id_str_subset) field_names = data_sources.dtype.names data_sources_elem = etree.SubElement(root_elem, 'plot_data_sources') # Create subelements for each unique plot data source for ds in np.unique(data_sources.DATA_SOURCE): data_source_elem = \ etree.SubElement(data_sources_elem, 'plot_data_source') child = etree.SubElement(data_source_elem, 'data_source') data_source_elem[child.tag] = ds child = etree.SubElement(data_source_elem, 'description') descriptions = \ data_sources[np.where(data_sources.DATA_SOURCE == ds)] description = np.unique(descriptions) data_source_elem[child.tag] = description['DESCRIPTION'][0] years_elem = etree.SubElement(data_source_elem, 'assessment_years') recs = data_sources[np.where(data_sources.DATA_SOURCE == ds)] # Create subelements for each plot assessment years for # this data source for rec in recs: year_elem = etree.SubElement(years_elem, 'year') child = etree.SubElement(year_elem, 'assessment_year') year_elem[child.tag] = getattr(rec, 'ASSESSMENT_YEAR') child = etree.SubElement(year_elem, 'plot_count') year_elem[child.tag] = getattr(rec, 'PLOT_COUNT') # Get the species scientific and common names species_names = \ self.plot_db.get_species_names(self.id_str, p.lump_table) field_names = species_names.dtype.names species_names_elem = etree.SubElement(root_elem, 'species_names') for species_name in species_names: species_name_elem = etree.SubElement(species_names_elem, 'species') for f in field_names: child = etree.SubElement(species_name_elem, f.lower()) species_name_elem[child.tag] = getattr(species_name, f) # Get the ordination variable descriptions ordination_vars = ','.join(p.get_ordination_variable_names()) ordination_descr = \ self.plot_db.get_ordination_variable_descriptions(ordination_vars) field_names = ordination_descr.dtype.names ord_vars_elem = etree.SubElement(root_elem, 'ordination_variables') for ord_var in ordination_descr: ord_var_elem = \ etree.SubElement(ord_vars_elem, 'ordination_variable') for f in field_names: child = etree.SubElement(ord_var_elem, f.lower()) ord_var_elem[child.tag] = getattr(ord_var, f) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write XML to file report_metadata_file = p.report_metadata_file aa_dir = os.path.dirname(report_metadata_file) if not os.path.exists(aa_dir): os.makedirs(aa_dir) tree.write(report_metadata_file, pretty_print=True)
def create_attribute_metadata(self, field_names): """ Create the attribute metadata based on the field_names parameter Parameters ---------- field_names: list Field names for which to get metadata Returns ------- None """ p = self.parameter_parser # Get the metadata associated with the attribute data structure_fields, structure_codes = \ self.plot_db.get_structure_metadata(p.model_project) species_fields = \ self.plot_db.get_species_metadata() # Create the metadata XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/stand_attributes.xsd' root_str = """ <attributes xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ( 'http://www.w3.org/2001/XMLSchema-instance', xml_schema_file ) root_elem = objectify.fromstring(root_str) for n in field_names: n = n.upper() other_fields = {} try: r = structure_fields[structure_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 0 other_fields['PROJECT_ATTR'] = r.PROJECT_ATTR other_fields['ACCURACY_ATTR'] = r.ACCURACY_ATTR except IndexError: try: r = species_fields[species_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 1 other_fields['PROJECT_ATTR'] = 1 other_fields['ACCURACY_ATTR'] = 1 except IndexError: err_msg = n + ' has no metadata' print err_msg continue # Add the attribute element attribute_elem = etree.SubElement(root_elem, 'attribute') # Add all metadata common to both structure and species recarrays fields = ('FIELD_NAME', 'FIELD_TYPE', 'UNITS', 'DESCRIPTION', 'SHORT_DESCRIPTION') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = getattr(r, f) # Add special fields customized for structure and species fields = ('SPECIES_ATTR', 'PROJECT_ATTR', 'ACCURACY_ATTR') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = other_fields[f] # Print out codes if they exist if r.CODED == True: codes_elem = etree.SubElement(attribute_elem, 'codes') try: c_records = \ structure_codes[structure_codes.FIELD_NAME == n] except IndexError: #try: # c_records = \ # species_codes[species_codes.FIELD_NAME == n] #except IndexError: err_msg = 'Codes were not found for ' + n print err_msg continue for c_rec in c_records: code_elem = etree.SubElement(codes_elem, 'code') c_fields = ('CODE_VALUE', 'DESCRIPTION', 'LABEL') for c in c_fields: child = etree.SubElement(code_elem, c.lower()) code_elem[child.tag] = getattr(c_rec, c) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write out this metadata file metadata_file = p.stand_metadata_file tree.write(metadata_file, pretty_print=True)
def create_report_metadata(self): """ Create the XML file containing metadata to be written into the accuracy assessment report Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Connect to the lemma web database web_db = web_database.WebDatabase(p.model_project, p.model_region, p.web_dsn) # Create the XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/report_metadata.xsd' root_str = """ <report_metadata xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ('http://www.w3.org/2001/XMLSchema-instance', xml_schema_file) #root_str = "<report_metadata/>" root_elem = objectify.fromstring(root_str) # Get the model region overview mr_overview = web_db.get_model_region_info() field_names = mr_overview.dtype.names overview_elem = etree.SubElement(root_elem, 'overview') for f in field_names: child = etree.SubElement(overview_elem, f.lower()) overview_elem[child.tag] = getattr(mr_overview[0], f) # Get contact info for people associated with this project people_info = web_db.get_people_info() field_names = people_info.dtype.names people_elem = etree.SubElement(root_elem, 'contact_information') for person in people_info: person_elem = etree.SubElement(people_elem, 'contact') for f in field_names: child = etree.SubElement(person_elem, f.lower()) person_elem[child.tag] = getattr(person, f) # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Subset the string of plot IDs to thin to one plot at a # location just for locations that have the exact same spectral # values for all plot measurements (i.e. places where the # imagery has been stabilized delete_list = self.plot_db.get_duplicate_plots_to_remove(self.id_str) if len(delete_list) > 0: id_list_subset = [int(x) for x in self.id_str.split(",")] for id in delete_list: try: id_list_subset.remove(id) # if the ID is not in the list, go on to the next ID except ValueError: continue # turn subsetted id_list into a string id_str_subset = ','.join(map(str, id_list_subset)) else: id_str_subset = self.id_str # Get the plot data sources data_sources = self.plot_db.get_plot_data_source_summary(id_str_subset) field_names = data_sources.dtype.names data_sources_elem = etree.SubElement(root_elem, 'plot_data_sources') # Create subelements for each unique plot data source for ds in np.unique(data_sources.DATA_SOURCE): data_source_elem = \ etree.SubElement(data_sources_elem, 'plot_data_source') child = etree.SubElement(data_source_elem, 'data_source') data_source_elem[child.tag] = ds child = etree.SubElement(data_source_elem, 'description') descriptions = \ data_sources[np.where(data_sources.DATA_SOURCE == ds)] description = np.unique(descriptions) data_source_elem[child.tag] = description['DESCRIPTION'][0] years_elem = etree.SubElement(data_source_elem, 'assessment_years') recs = data_sources[np.where(data_sources.DATA_SOURCE == ds)] # Create subelements for each plot assessment years for # this data source for rec in recs: year_elem = etree.SubElement(years_elem, 'year') child = etree.SubElement(year_elem, 'assessment_year') year_elem[child.tag] = getattr(rec, 'ASSESSMENT_YEAR') child = etree.SubElement(year_elem, 'plot_count') year_elem[child.tag] = getattr(rec, 'PLOT_COUNT') # Get the species scientific and common names species_names = \ self.plot_db.get_species_names(self.id_str, p.lump_table) field_names = species_names.dtype.names species_names_elem = etree.SubElement(root_elem, 'species_names') for species_name in species_names: species_name_elem = etree.SubElement(species_names_elem, 'species') for f in field_names: child = etree.SubElement(species_name_elem, f.lower()) species_name_elem[child.tag] = getattr(species_name, f) # Get the ordination variable descriptions ordination_vars = ','.join(p.get_ordination_variable_names()) ordination_descr = \ self.plot_db.get_ordination_variable_descriptions(ordination_vars) field_names = ordination_descr.dtype.names ord_vars_elem = etree.SubElement(root_elem, 'ordination_variables') for ord_var in ordination_descr: ord_var_elem = \ etree.SubElement(ord_vars_elem, 'ordination_variable') for f in field_names: child = etree.SubElement(ord_var_elem, f.lower()) ord_var_elem[child.tag] = getattr(ord_var, f) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write XML to file report_metadata_file = p.report_metadata_file aa_dir = os.path.dirname(report_metadata_file) if not os.path.exists(aa_dir): os.makedirs(aa_dir) tree.write(report_metadata_file, pretty_print=True)
def create_attribute_metadata(self, field_names): """ Create the attribute metadata based on the field_names parameter Parameters ---------- field_names: list Field names for which to get metadata Returns ------- None """ p = self.parameter_parser # Get the metadata associated with the attribute data structure_fields, structure_codes = \ self.plot_db.get_structure_metadata(p.model_project) species_fields = \ self.plot_db.get_species_metadata() # Create the metadata XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/stand_attributes.xsd' root_str = """ <attributes xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ('http://www.w3.org/2001/XMLSchema-instance', xml_schema_file) root_elem = objectify.fromstring(root_str) for n in field_names: n = n.upper() other_fields = {} try: r = structure_fields[structure_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 0 other_fields['PROJECT_ATTR'] = r.PROJECT_ATTR other_fields['ACCURACY_ATTR'] = r.ACCURACY_ATTR except IndexError: try: r = species_fields[species_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 1 other_fields['PROJECT_ATTR'] = 1 other_fields['ACCURACY_ATTR'] = 1 except IndexError: err_msg = n + ' has no metadata' print err_msg continue # Add the attribute element attribute_elem = etree.SubElement(root_elem, 'attribute') # Add all metadata common to both structure and species recarrays fields = ('FIELD_NAME', 'FIELD_TYPE', 'UNITS', 'DESCRIPTION', 'SHORT_DESCRIPTION') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = getattr(r, f) # Add special fields customized for structure and species fields = ('SPECIES_ATTR', 'PROJECT_ATTR', 'ACCURACY_ATTR') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = other_fields[f] # Print out codes if they exist if r.CODED == True: codes_elem = etree.SubElement(attribute_elem, 'codes') try: c_records = \ structure_codes[structure_codes.FIELD_NAME == n] except IndexError: #try: # c_records = \ # species_codes[species_codes.FIELD_NAME == n] #except IndexError: err_msg = 'Codes were not found for ' + n print err_msg continue for c_rec in c_records: code_elem = etree.SubElement(codes_elem, 'code') c_fields = ('CODE_VALUE', 'DESCRIPTION', 'LABEL') for c in c_fields: child = etree.SubElement(code_elem, c.lower()) code_elem[child.tag] = getattr(c_rec, c) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write out this metadata file metadata_file = p.stand_metadata_file tree.write(metadata_file, pretty_print=True)
def create_model_xml(self, model_directory, model_region, model_year): """ Create an XML string from prototype XML specialized for the model directory, model region, and model year Parameters ---------- model_directory : str Model directory for this model model_region : int Modeling region with which to specialize this XML model_year : int Year (4-digit) with which to specialize this XML Returns ------- out_xml : StringIO XML string to be serialized """ # Make a deep copy of this instance obj = deepcopy(self) # Switch the parameter_set tag to now be 'FULL' obj.parameter_set = 'FULL' # Replace the necessary elements with the model directory, # model region and year obj.model_directory = model_directory obj.model_region = model_region obj.model_year = model_year # Create a PlotDatabase instance for filling in many elements # Note that we pass obj.model_region and obj.model_year as # specified rather than the prototype's values; everything else # can come from the prototype plot_db = \ plot_database.PlotDatabase(self.model_type, obj.model_region, self.buffer, obj.model_year, self.summary_level, self.image_source, self.image_version, dsn=self.plot_dsn) # Model boundary_raster and region extent rec = (plot_db.get_model_region_window())[0] obj.boundary_raster = rec.BOUNDARY_RASTER obj.envelope = [rec.X_MIN, rec.Y_MIN, rec.X_MAX, rec.Y_MAX] # Plot image crosswalk # # For model types that use imagery, we need to match plot assessment # years to available image years. First look for the presence of a # keyword tag in the <plot_image_crosswalk> block and if it exists, # query the database for the plot assessment years and available # image years and return the formatted XML. Otherwise, skip this # section as the crosswalk has already been defined. value = self.plot_image_crosswalk if value and isinstance(value, str): pi_data = plot_db.get_plot_image_pairs(value) obj.plot_image_crosswalk = pi_data # If the plot_image_crosswalk tag is missing, we need to populate # the plot_years tag in non-imagery models else: obj.plot_years = plot_db.get_plot_years() # Ordination variables # # First look for the presence of the keyword tag in the # 'ordination_variables' block and if it exists, query the database # for the allowed spatial variables. Otherwise, skip over this section # as the spatial variables have already been specified value = self.get_ordination_variables() if value and isinstance(value, str): ord_vars = plot_db.get_ordination_variable_list( value, self.variable_filter) obj.set_ordination_variables(ord_vars) # Accuracy assessment report name if self.accuracy_assessment_report: mr_str = 'mr' + str(obj.model_region) prefix = '_'.join( (mr_str, self.model_type, str(obj.model_year), 'aa')) obj.accuracy_assessment_report = prefix + '.pdf' # Deannotate the tree objectify.deannotate(obj.tree) etree.cleanup_namespaces(obj.tree) # Ensure the newly created XML validates against the schema utilities.validate_xml(obj.tree, self.xml_schema_file) # Return the tree for serializing return obj
def validate(self): # Validate the XML schema - if the current tree doesn't validate # against the XML schema, this will raise an exception utilities.validate_xml(self.xml_tree, self.xml_schema_file)