def test_delete_appinfo_element_removed_if_exists(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" attribute_name = "attribute" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, attribute_name) expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_add_appinfo_element_present_in_second_of_two_appinfo(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo></xs:appinfo> <xs:appinfo><attribute>old</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, "attribute") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_add_appinfo_element_no_element_adds_it(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"><xs:annotation> <xs:appinfo></xs:appinfo></xs:annotation></xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = add_appinfo_element(xsd_string, xpath, "attribute", "value") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def render_chemical_composition(data_constituents, display_purity, display_error, template_url): """ render elements from xml constituents using the given template Args: data_constituents: display_purity: display_error: template_url: Returns: selected_elements """ data = [] if len(data_constituents) > 0: constituents = XSDTree.fromstring("<constituents>" + data_constituents + "</constituents>") # build data to display if len(constituents) > 0: for constituent in constituents: constituent_elements = list(constituent) name = '' quantity = '' purity = '' error = '' for constituent_element in constituent_elements: if constituent_element.tag == 'element': if constituent_element.text is not None: name = constituent_element.text elif constituent_element.tag == 'quantity': if constituent_element.text is not None: quantity = constituent_element.text elif constituent_element.tag == 'purity': if constituent_element.text is not None: purity = constituent_element.text elif constituent_element.tag == 'error': if constituent_element.text is not None: error = constituent_element.text item = { 'name': name, 'quantity': quantity, 'purity': purity, 'error': error } data.append(item) # context building context = { 'purity': display_purity, 'error': display_error, 'data': data } # template loading with context template = loader.get_template(template_url) return template.render(context)
def render_chemical_composition(data_constituents, display_purity, display_error, template_url): """render elements from xml constituents using the given template Args: data_constituents: display_purity: display_error: template_url: Returns: selected_elements """ data = [] if len(data_constituents) > 0: constituents = XSDTree.fromstring("<constituents>" + data_constituents + "</constituents>") # build data to display if len(constituents) > 0: for constituent in constituents: constituent_elements = list(constituent) name = "" quantity = "" purity = "" error = "" for constituent_element in constituent_elements: if constituent_element.tag == "element": if constituent_element.text is not None: name = constituent_element.text elif constituent_element.tag == "quantity": if constituent_element.text is not None: quantity = constituent_element.text elif constituent_element.tag == "purity": if constituent_element.text is not None: purity = constituent_element.text elif constituent_element.tag == "error": if constituent_element.text is not None: error = constituent_element.text item = { "name": name, "quantity": quantity, "purity": purity, "error": error, } data.append(item) # context building context = {"purity": display_purity, "error": display_error, "data": data} # template loading with context template = loader.get_template(template_url) return template.render(context)
def parse_data_with_root(self, data): """Parse the xml and add a root to it for validation Args: data: Returns: """ unescaped_data = self.parser.unescape(data) # concat a root to the entry, then parse the string to a tree and return it return XSDTree.fromstring("".join(["<root>", unescaped_data, "</root>"]))
def sanitize(input_value): """Sanitize the strings in the input :param input_value: :return: """ # get the type of the input input_type = type(input_value) # input is a list if input_type == list: clean_value = [] for item in input_value: clean_value.append(sanitize(item)) return clean_value # input is a dict elif input_type == dict: return { sanitize(key): sanitize(val) for key, val in list(input_value.items()) } # input is a string of characters elif input_type == str: try: # XML cleaning xml_cleaner_parser = etree.XMLParser(remove_blank_text=True) xml_data = XSDTree.fromstring(input_value, parser=xml_cleaner_parser) input_value = XSDTree.tostring(xml_data) except XMLError as e: # input is not XML, pass logger.warning("sanitize threw an exception: {0}".format(str(e))) finally: try: json_value = json.loads(input_value) sanitized_value = sanitize(json_value) clean_value = json.dumps(sanitized_value) except ValueError: clean_value = escape(input_value) return clean_value # input is a number elif input_type == int or input_type == float: return input_value # default, escape characters else: # Default sanitizing return escape(str(input_value))
def parse_ontology(ontology): """ Create Navigation associated to the ontology Args: String/unicode object Returns: OrderedDict """ owl_tree = XSDTree.fromstring(ontology) nav_tree = generate_classes(owl_tree) return nav_tree
def parse_html(html_text, parent_tag=""): """Try to parse and unparse HTML to verify that is correctly formatted Params: html_text: parent_tag: Returns: Raises: """ try: return XSDTree.fromstring("<%s>%s</%s>" % (parent_tag, html_text, parent_tag)) except Exception as e: raise HTMLError(str(e))
def get_periodic_table_selected_elements(data_constituents): """ get selected elements from xml constituents Args: data_constituents: Returns: selected_elements """ selected_elements = [] constituents = XSDTree.fromstring("<constituents>" + data_constituents + "</constituents>") for constituent in constituents: constituent_elements = list(constituent) for constituent_element in constituent_elements: if constituent_element.tag == 'element': if constituent_element.text is not None: selected_elements.append(constituent_element.text) return selected_elements
def _get_target_namespace(xml_schema): """ Get the target namespace. Args: xml_schema: XML representation of the schema. Returns: The target namespace. """ xsd_tree = XSDTree.fromstring(xml_schema.encode('utf-8')) root = xsd_tree.find(".") if 'targetNamespace' in root.attrib: target_namespace = root.attrib['targetNamespace'] if target_namespace not in root.nsmap.values(): message = "The use of a targetNamespace without an associated prefix is not supported." raise oai_pmh_exceptions.OAIAPILabelledException(message=message, status_code=status.HTTP_400_BAD_REQUEST) else: target_namespace = "http://www.w3.org/2001/XMLSchema" return target_namespace
def _replace_include_by_content(xml_tree, include_element, dependency_content): """Replace an include by its content Args: xml_tree: include_element: dependency_content: Returns: """ if dependency_content is not None: # build the tree of the dependency dependency_tree = XSDTree.fromstring(dependency_content) # get elements from dependency dependency_elements = dependency_tree.getchildren() # appends elements from dependency to tree for element in dependency_elements: xml_tree.getroot().append(element) # remove the include element include_element.getparent().remove(include_element)
def _render_module(self, request): # get the xml path of the element on which the module is placed xml_xpath = request.GET.get('xml_xpath', None) # xml path is provided if xml_xpath is not None: try: # create unique field id from xpath field_id = re.sub('[/.:\[\]]', '', xml_xpath) # split the xpath split_xml_xpath = xml_xpath.split('/') # get the last element of the xpath xml_element = split_xml_xpath[-1] # check if namespace is present if ":" in xml_element: # split element name split_xml_element = xml_element.split(":") # only keep element name if namespace is present xml_element = split_xml_element[-1] # get registry template template = template_registry_api.get_current_registry_template( ) # get all refinements for this template refinements = refinement_api.get_all_filtered_by_template_hash( template.hash) # get the refinement for the xml element refinement = refinements.get(xsd_name=xml_element) # initialize reload data for the form reload_form_data = {} # data to reload were provided if self.data != '': # build filed for the refinement form for the current xml element refinement_form_field = "{0}-{1}".format( RefinementForm.prefix, field_id) # get the categories for the current refinement categories = category_api.get_all_filtered_by_refinement_id( refinement.id) # Initialize list of categories id reload_categories_id_list = [] # load list of data to reload from XML reload_data = XSDTree.fromstring("<root>" + self.data + "</root>") # Iterate xml elements for reload_data_element in list(reload_data): try: if len(reload_data_element) > 0: # The xml element to be reloaded is the child element child = reload_data_element[0] # get its value selected_value = child.text # find the corresponding category and add its id to the list reload_categories_id_list.append( categories.get(value=selected_value).id) except Exception, e: raise ModuleError( "Something went wrong when reloading data from XML." + e.message) # set data to reload in the form reload_form_data[ refinement_form_field] = reload_categories_id_list return AbstractModule.render_template( 'core_module_fancy_tree_registry_app/fancy_tree.html', { 'form': RefinementForm(refinement=refinement, field_id=field_id, data=reload_form_data) }) except Exception, e: raise ModuleError( "Something went wrong when rendering the module: " + e.message)
def _load_data_view(node_id, nav_id, data_id, from_tree=True): """ Load view for a data, from a tree or a link Args: node_id: nav_id: data_id: from_tree: Returns: """ if not from_tree: navigation_node = navigation_operations.get_navigation_node_for_document( node_id, data_id) else: navigation_node = navigation_api.get_by_id(node_id) # Initialize parameters in order to download later some information xml_document = Data.get_by_id(data_id) projection_views = json.loads(navigation_node.options["projection_view"]) view_data = { "header": xml_document.title, "type": "leaf", "views": [], "download": [] } # Initialize parameters in order to download later some information # dict of doc_id and queries done of cross documents : {id_doc1: [list of queries1], id_doc2: [list of queries2]} dict_id_and_queries_cross_docs = dict() # dict of doc_id and queries results for a cross document : {id_doc: [list of queries results]} dict_id_and_queryresults_cross_docs = dict() # dict of queried parameter and associated result for the queries done on the main doc : { queried parameter: value} dict_tags_values_main_doc = dict() values_of_items_from_main_doc = [] list_values_of_items_from_main_doc = [] # Send the annotation to the processor and collect the data for projection_view in projection_views: result_data = {"title": projection_view["title"], "data": None} # FIXME better handling of x-queries # Get info from other doc (without the main queried document) if "query" in projection_view.keys(): doc_projections = [] # Get the names of the tags tag need to be displayed for value in projection_view["data"]: doc_projections.append(value.get('path')) result_data["data"] = parser_processview.process_cross_query( nav_id, data_id, projection_view["query"], projection_view["data"]) # Get all the queried documents (without the main queried document) queried_docs = parser_processview.ids_docs_to_querys for id_doc in queried_docs: other_doc_query = {"_id": ObjectId(id_doc)} # list of queries done on the current document query_list = list() # list of queries results done on the current document result_list = list() for projection in doc_projections: # Get the MongoDB query path for the parameter that need to be displayed # eg: query_path = dict_content.a.b.c.d.e query_path = { doc_projections[doc_projections.index(projection)]: 1 } # Get the Data corresponding to the id queried_data = Data.execute_query(other_doc_query).only( query_path.keys()[0]) # Add the query to the query list for the current doc query_list.append(query_path.keys()[0].replace( "dict_content.", "")) try: # Get the result of the query result_query = get_projection(queried_data[0]) # Add the result of the query to the result list for the current doc result_list.append(str(result_query)) except: pass dict_id_and_queries_cross_docs[id_doc] = query_list dict_id_and_queryresults_cross_docs[id_doc] = result_list # Get info from main doc else: # list of queries done on the current document (Main doc) query_list = [] doc_projections = [ value.get('path') for value in projection_view["data"] ] query_list = [ doc_projections[doc_projections.index(projection)] for projection in doc_projections ] # Get all results of the queries. type(result_data["data"]) = dict or instance of dict result_data["data"] = parser_processview.processview( nav_id, data_id, projection_view["data"]) for query_path, dict_result in zip(query_list, result_data["data"]): # eg: query_path = a.b.c.d # We have only one value as result for the query dict_result_value = dict_result.get("value", None) if dict_result_value is not None: tag = query_path.split(".")[ -1] # Get only d (the needed tag) if tag in dict_tags_values_main_doc: v = dict_tags_values_main_doc[tag] if isinstance(v, list): dict_tags_values_main_doc[tag].append( dict_result_value) else: dict_tags_values_main_doc[tag] = [ dict_tags_values_main_doc[tag], dict_result_value ] else: dict_tags_values_main_doc[tag] = dict_result_value # We have multiple values for this result: all the chemical components # (dict_result[key] is an inclusion of dicts) dict_result_item, dict_result_items = [ dict_result.get(_, None) for _ in ["item", "items"] ] if dict_result_item or dict_result_items: dict_result_item_v = dict_result_item if dict_result_item is not None else dict_result_items #dict_result_item_v = [dict_result_item, dict_result_items][dict_result_item not None] # From the inclusion of dict, process the dict into a list and get all the needed values # values_of_items_from_main_doc = list[list[value1 for dict i,value2 for dict 2, ..]] # eg: values_of_items_from_main_doc= [l1,l2] # l1 = [["location", "NIST"], ["Build location X", "59"], "EWI_Build1"]] # l2 = [["location", "NIST"], ["Build location X", "47"], "EWI_Build2"]] get_values_items(dict_result_item_v, values_of_items_from_main_doc) for list_of_values in values_of_items_from_main_doc: for value in list_of_values: # We have a list :value= [displayed parameter, value] # eg : ["Build location X", "59"] if len(value) == 2: # list_tag_of_items_from_main_doc.append(value[0]) list_values_of_items_from_main_doc.append( value[1]) # Get the value. eg: 59 # We have only one value (last value in the list. eg: EWI_Build1 in l1) else: list_values_of_items_from_main_doc.append( value) view_data["views"].append(result_data) # Get the displayed data as an XML format in order to download it later # # STEP 1: Build the XML based on initial tags for the crossed documents: # Go into the dict of doc_id and queries of cross documents and build the xml for each document # dict_id_and_queries_cross_docs = {id_doc1: [list of queries1], id_doc2: [list of queries2]} xml_cross_queries_string = "" for key in dict_id_and_queries_cross_docs: # key = doc_id # Get all queries for the current doc_id. # eg: query_list = ["a.b.c.d","a.b.c.e","a.b.f.g"] query_list = dict_id_and_queries_cross_docs[key] # For the doc_id get all the results of the queries done # results = ["D","E","G"] results = dict_id_and_queryresults_cross_docs[key] # Build a xml string for the doc associated to doc_id thanks to the list of queries and the result list xml_string = queryNode.tree_to_xml_string( queryNode.aggregate_query(query_list, results)) xml_object = XSDTree.fromstring(xml_string + "</data>") # Add the XML part to create an XML resulting of tag and values of crossed documents xml_cross_queries_string += XSDTree.tostring(xml_object, pretty=True) # STEP 2: Build the XML for the main document with only the needed tags: # Get the Data associated to the main document data = Data.get_by_id(data_id) # Get the XML content file_content = data.xml_content xml_main_doc = XSDTree.fromstring(file_content) # Transform all the result value into a string to help while testing equality of values with the original XML for key, value in dict_tags_values_main_doc.items(): if isinstance(value, list): dict_tags_values_main_doc[key] = map( lambda x: x if isinstance(x, unicode) else str(x), value) else: try: dict_tags_values_main_doc[key] = str(value) except: dict_tags_values_main_doc[key] = u''.join(value).encode( 'utf-8') v = dict_tags_values_main_doc[key] # Process the XML structure that represents the main document to keep only the needed tags and information for child in xml_main_doc.iter(): # Transform all values into a string try: text = str(child.text) except: text = u''.join(child.text).encode('utf-8') # If the xml tag is in our dict of tags and values from the main document # and its value = dict_tags_values_main_doc[child.tag] we keep the text in the XML structure # else we remove the text if child.tag in dict_tags_values_main_doc.keys(): # Fixme # if text != str(dict_tags_values_main_doc[child.tag]) or dict_tags_values_main_doc[child.tag] not in text: (caution: does not keep all needed values if we replace by this line) if isinstance(dict_tags_values_main_doc[child.tag], list): display_value = False for value in dict_tags_values_main_doc[child.tag]: if value == text or value in text: display_value = True break if not display_value: child.text = "" else: if text == str( dict_tags_values_main_doc[child.tag] ) or dict_tags_values_main_doc[child.tag] in text: pass else: child.text = "" else: # If text is in our list of items of the main doc we keep the value and remove it from our list of items if text in list_values_of_items_from_main_doc: list_values_of_items_from_main_doc.remove(text) else: display_text = False # v = processed name of the tag as appears in the rendered data after clicking a doc of the tree # If this name is in our list of items from the main doc we keep the value (text) in the XML tree # else we remove this value for v in list_values_of_items_from_main_doc: if v in text: display_text = True break if not display_text: child.text = "" xml_f_main_doc = xml_main_doc # Remove empty leafs of the tree (all child where child.text="") while check_empty_nodes(xml_main_doc): remove_empty_nodes(xml_f_main_doc) # Build the final XML string result of the main doc and the crossed docs xml_main_doc = XSDTree.tostring(xml_f_main_doc, pretty="TRUE") xml = xml_main_doc + xml_cross_queries_string xml_final = "<xml>\n" + xml + "</xml>" xml_final = u''.join(xml_final).encode('utf-8') xml_final = str(xml_final) view_data["download"] = xml_final return view_data
def _render_module(self, request): # get the xml path of the element on which the module is placed xml_xpath = request.GET.get("xml_xpath", None) # xml path is provided if xml_xpath is not None: try: # create unique field id from xpath field_id = re.sub("[/.:\[\]]", "", xml_xpath) # split the xpath split_xml_xpath = xml_xpath.split("/") # get the last element of the xpath xml_element = split_xml_xpath[-1] # check if namespace is present if ":" in xml_element: # split element name split_xml_element = xml_element.split(":") # only keep element name if namespace is present xml_element = split_xml_element[-1] # get registry template template = template_registry_api.get_current_registry_template( request=request) # get all refinements for this template refinements = refinement_api.get_all_filtered_by_template_hash( template.hash) # get the refinement for the xml element refinement = refinements.get(xsd_name=xml_element) # initialize reload data for the form reload_form_data = {} # data to reload were provided if self.data != "": # build filed for the refinement form for the current xml element refinement_form_field = "{0}-{1}".format( RefinementForm.prefix, field_id) # get the categories for the current refinement categories = category_api.get_all_filtered_by_refinement_id( refinement.id) # Initialize list of categories id reload_categories_id_list = [] # load list of data to reload from XML reload_data = XSDTree.fromstring("<root>" + self.data + "</root>") # Iterate xml elements for reload_data_element in list(reload_data): try: if len(reload_data_element) > 0: # The xml element to be reloaded is the child element child = reload_data_element[0] # get its value selected_value = child.text # find the corresponding category and add its id to the list category = categories.get(value=selected_value) # if the element is an unspecified element if category.slug.startswith(UNSPECIFIED_LABEL): # get the parent category selected_value += CATEGORY_SUFFIX category = categories.get( value=selected_value) reload_categories_id_list.append(category.id) except Exception as e: raise ModuleError( "Something went wrong when reloading data from XML." + str(e)) # set data to reload in the form reload_form_data[ refinement_form_field] = reload_categories_id_list return AbstractModule.render_template( "core_module_fancy_tree_registry_app/fancy_tree.html", { "form": RefinementForm( refinement=refinement, field_id=field_id, data=reload_form_data, ) }, ) except Exception as e: raise ModuleError( "Something went wrong when rendering the module: " + str(e)) else: raise ModuleError( "xml_xpath was not found in request GET parameters.")
def _retrieve_data(self, request): """Return module"s data Args: request: Returns: """ data = "" if request.method == "GET": if "data" in request.GET and request.GET["data"] != "": xml_table = XSDTree.fromstring("<table>" + request.GET["data"] + "</table>") self.table_name = "name" self.table = {"headers": [], "values": []} headers = xml_table[0] for header in headers.iter("column"): self.table["headers"].append(header.text) values = xml_table[1] for row in values.iter("row"): value_list = [] for data in row.iter("column"): value_list.append(data.text) self.table["values"].append(value_list) data = ExcelUploaderModule.extract_xml_from_table( self.table_name, self.table) elif request.method == "POST": form = ExcelUploaderForm(request.POST, request.FILES) if not form.is_valid(): raise ModuleError( "Data not properly sent to server. Please set 'file' in POST data." ) try: input_excel = request.FILES["file"] book = open_workbook(file_contents=input_excel.read()) sheet = book.sheet_by_index(0) self.table = {"headers": [], "values": []} for row_index in range(sheet.nrows): row_values = [] for col_index in range(sheet.ncols): cell_text = str(sheet.cell(row_index, col_index).value) if row_index == 0: self.table["headers"].append(cell_text) else: row_values.append(cell_text) if len(row_values) != 0: self.table["values"].append(row_values) self.table_name = str(input_excel) except Exception as e: logger.warning("_retrieve_data threw an exception: {0}".format( str(e))) data = ExcelUploaderModule.extract_xml_from_table( self.table_name, self.table) return data
def _retrieve_data(self, request): """ Return module's data Args: request: Returns: """ data = '' if request.method == 'GET': if 'data' in request.GET: xml_table = XSDTree.fromstring("<table>" + request.GET['data'] + "</table>") self.table_name = 'name' self.table = { 'headers': [], 'values': [] } headers = xml_table[0] for header in headers.iter('column'): self.table['headers'].append(header.text) values = xml_table[1] for row in values.iter('row'): value_list = [] for data in row.iter('column'): value_list.append(data.text) self.table['values'].append(value_list) data = ExcelUploaderModule.extract_xml_from_table(self.table_name, self.table) elif request.method == 'POST': form = ExcelUploaderForm(request.POST, request.FILES) if not form.is_valid(): raise ModuleError('Data not properly sent to server. Please set "file" in POST data.') try: input_excel = request.FILES['file'] book = open_workbook(file_contents=input_excel.read()) sheet = book.sheet_by_index(0) self.table = { 'headers': [], 'values': [] } for row_index in range(sheet.nrows): row_values = [] for col_index in range(sheet.ncols): cell_text = sheet.cell(row_index, col_index).value if isinstance(cell_text, unicode): cell_text = unicode(cell_text) else: cell_text = str(cell_text) if row_index == 0: self.table['headers'].append(cell_text) else: row_values.append(cell_text) if len(row_values) != 0: self.table['values'].append(row_values) self.table_name = unicode(input_excel) except: pass data = ExcelUploaderModule.extract_xml_from_table(self.table_name, self.table) return data