def __init__(self, onixfile, profile): if hasattr(onixfile, 'name'): self.basename = basename(onixfile.name) else: self.basename = basename(onixfile) if hasattr(onixfile, 'seek'): onixfile.seek(0) self._onix_file = OnixFile(onixfile) self.onix = self._onix_file.xml_tree().getroot() # Get rid of namespaces for easier xpath search for elem in self.onix.getiterator(): if not hasattr(elem.tag, 'find'): continue i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] objectify.deannotate(self.onix, cleanup_namespaces=True) self.spec = yaml.load(open(profile, 'rb')) name, ext = splitext(basename(profile)) self.name = name.upper() self.urls_checked = set() self.errors = [] self.warnings = []
def build_xml(entries, with_attributes=True): root = objectify.Element('cityindex') root.title = 'Index of major German cities' for table_entry in entries: if len(table_entry) != 11: log("Invalid entry, expected 11 items, got %d: %s" % (len(table_entry), table_entry)) continue name, ew1980, ew1990, ew2000, ew2009, ew2010, area, ewkm, ch0010, first, region = table_entry entry = etree.SubElement(root, 'entry') entry.city = name entry.region = region entry.country = 'Deutschland' entry.area_km2 = area if with_attributes: entry.inhabitants = [ew1980, ew1990, ew2000, ew2009, ew2010] entry.inhabitants[0].set('year', '1980') entry.inhabitants[0].set('year', '1990') entry.inhabitants[1].set('year', '2000') entry.inhabitants[2].set('year', '2009') entry.inhabitants[3].set('year', '2010') else: entry.inhabitants = ew2010 entry.inhabitants_per_km2 = ewkm entry.development_2000_2010 = ch0010.replace(u'\u2212', '-') # fix minus sign entry.major_since = first entry.description = u'%s ist eine deutsche Großstadt.' % name objectify.deannotate(root) etree.cleanup_namespaces(root) return root
def bdml_export(beam_list, filename, progress=None): """Accepts a list of beam objects and saves it to filename in BDML format""" #Is beam_list a list of beam objects or tuples? (Model passes list of #tuples) if isinstance(beam_list[0], tuple): beam_list = [j for i in beam_list for j in i if isinstance(j, Beam)] NSMAP = {'xsi' : "http://www.w3.org/2001/XMLSchema-instance"} xml_tree = etree.ElementTree(objectify.Element("{http://www.radpy.org}BDML", nsmap=NSMAP)) xml_root = xml_tree.getroot() xml_root.set("{http://www.w3.org/2001/XMLSchema-instance}schemaLocation", "http://www.radpy.org/BDML/BDML.xsd") for value, i in enumerate(beam_list): if progress: progress.setValue(value) temp = etree.SubElement(xml_root,"{http://www.radpy.org}Beam") xml_root.Beam[-1] = i.exportXML() #Get rid of objectify namespace objectify.deannotate(xml_tree) etree.cleanup_namespaces(xml_tree) file = open(filename,'w') xml_tree.write(file, pretty_print=True) file.close()
def clean_namespaces(root): for elem in root.getiterator(): i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i+1:] objectify.deannotate(root, cleanup_namespaces=True) return root
def parseXML(xmlFile): """Parse the XML file""" with open(xmlFile) as f: xml = f.read() root = objectify.fromstring(xml) # returns attributes in element node as dict attrib = root.attrib # how to extract element data begin = root.appointment.begin uid = root.appointment.uid # loop over elements and print their tags and text for e in root.appointment.iterchildren(): print("%s => %s" % (e.tag, e.text)) # how to change an element's text root.appointment.begin = "something else" print(root.appointment.begin) # how to add a new element root.appointment.new_element = "new data" # remove the py:pytype stuff objectify.deannotate(root) etree.cleanup_namespaces(root) obj_xml = etree.tostring(root, pretty_print=True) print(obj_xml) # save your xml with open("new.xml", "w") as f: f.write(obj_xml)
def new_tbl(rows, cols, width, height, tableStyleId=None): """Return a new ``<p:tbl>`` element tree""" # working hypothesis is this is the default table style GUID if tableStyleId is None: tableStyleId = '{5C22544A-7EE6-4342-B048-85BDC9FD1C3A}' xml = CT_Table._tbl_tmpl % (tableStyleId) tbl = parse_xml_bytes(xml) # add specified number of rows and columns rowheight = height/rows colwidth = width/cols for col in range(cols): # adjust width of last col to absorb any div error if col == cols-1: colwidth = width - ((cols-1) * colwidth) SubElement(tbl.tblGrid, 'a:gridCol', w=str(colwidth)) for row in range(rows): # adjust height of last row to absorb any div error if row == rows-1: rowheight = height - ((rows-1) * rowheight) tr = SubElement(tbl, 'a:tr', h=str(rowheight)) for col in range(cols): tr.append(CT_TableCell.new_tc()) objectify.deannotate(tbl, cleanup_namespaces=True) return tbl
def parse_with_region(person_list_file="../personlist.xml", city_index_file="../cityindex.xml"): """ Augment document while parsing. """ tree = objectify.parse(person_list_file) city_index = objectify.parse(city_index_file).getroot().entry person_list = tree.getroot() region_element = person_list.makeelement("region") for person in person_list.person: # find city and country of each person city = person.address.city country = person.address.country region = region_element for entry in city_index: if entry.city == city and entry.country == country: region = entry.region break # insert region tag after city tag city.addnext(deepcopy(region)) # change birth date to April 1st if born in December if person.birthday.month == "December": birthday = person.birthday birthday.day = 1 birthday.month = "April" birthday.month.set("number", "4") # return processed tree objectify.deannotate(tree) etree.cleanup_namespaces(tree) return tree
def new_placeholder_sp(id_, name, ph_type, orient, sz, idx): """ Return a new ``<p:sp>`` element tree configured as a placeholder shape. """ xml = CT_Shape._ph_sp_tmpl % (id_, name) sp = parse_xml_bytes(xml) # placeholder shapes get a "no group" lock SubElement(sp.nvSpPr.cNvSpPr, 'a:spLocks') sp.nvSpPr.cNvSpPr[qn('a:spLocks')].set('noGrp', '1') # placeholder (ph) element attributes values vary by type ph = SubElement(sp.nvSpPr.nvPr, 'p:ph') if ph_type != ST_PlaceholderType.OBJ: ph.set('type', ph_type) if orient != ST_Direction.HORZ: ph.set('orient', orient) if sz != ST_PlaceholderSize.FULL: ph.set('sz', sz) if idx != 0: ph.set('idx', str(idx)) placeholder_types_that_have_a_text_frame = ( ST_PlaceholderType.TITLE, ST_PlaceholderType.CTR_TITLE, ST_PlaceholderType.SUB_TITLE, ST_PlaceholderType.BODY, ST_PlaceholderType.OBJ ) if ph_type in placeholder_types_that_have_a_text_frame: sp.append(CT_TextBody.new_txBody()) objectify.deannotate(sp, cleanup_namespaces=True) return sp
def get_events_from_xml(self, file): """ Reads a Momentum audit.xml file removes the namespace and :param file: :return: """ root = etree.parse( file ) for elem in root.getiterator(): i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i+1:] objectify.deannotate(root, pytype=True, xsi=True, xsi_nil=True, cleanup_namespaces=True) # This strips off the part before the dash `-` from all attribute names. Makes it much easier to parse # and keeps the code cleaner for elem in root.getiterator(): if 'Type' in elem.attrib: elem.attrib['Type'] = elem.attrib['Type'][:-5] for attr, val in elem.attrib.iteritems(): i = attr.find('-') if i >= 0: elem.attrib[(attr[i+1:])] = val del elem.attrib[attr] return root
def parse_rules(filename): '''Extract rule information from UniRules in a file.''' with open(filename, 'rb') as data: logging.info('Starting work on file: {}'.format(filename)) xml = data.read() root = objectify.fromstring(xml) objectify.deannotate(root, cleanup_namespaces=True) for rule in root.unirule: rule_id = rule.attrib['id'] logging.info('Parsing rule: {}'.format(rule_id)) uni = UniRule() logging.info('Extracting meta from : {}'.format(rule_id)) extract_meta(rule, uni) logging.info('Extracting conditions from : {}'.format(rule_id)) extract_main_conditions(rule, uni) logging.info('Extracting annotations from : {}'.format(rule_id)) extract_main_annotations(rule, uni) try: for case in rule.cases.case: logging.info('Found a case.') basic_rule = BasicRule() uni.cases.append(basic_rule) extract_case_conditions(case, uni) extract_case_annotations(case, uni) except AttributeError: logging.info('Rule appears to have no cases: {}'.format(rule_id)) yield uni
def test_password_callback(self): """ `dataloadtool_export()` should use the password callback. """ # One member with a password, and one without. m1 = self.create_member_partial(commit=False) m2 = self.create_member_partial(commit=False) m1.gender = 'F' m2.gender = 'M' m1.save() m2.save() expected = self.expected_consumers([m1, m2]) expected.Consumer[0].UserAccount.LoginCredentials.Password = '******' del expected.Consumer[1].UserAccount.LoginCredentials.Password objectify.deannotate(expected, cleanup_namespaces=True) def mock_password(given_member): # XXX: Unsaved objects compare equal by default, so lookup by id instead. passwords = {m1.username: '******', m2.username: None} self.assertIn(given_member.username, passwords, 'Called with unexpected member: {0!r}'.format(given_member)) return passwords[given_member.username] consumers = self._dataloadtool_export(Member.objects.filter(pk__in=(m1.pk, m2.pk)), password_callback=mock_password) self.assertEqual( objectify.dump(expected), objectify.dump(consumers))
def change_catalog_owner(self, catalog_name, user_name): """Change the ownership of Catalog to a given user :param catalog_name: Catalog whose ownership needs to be changed :param user_name: New Owner of the Catalog :return: None """ if self.resource is None: self.resource = self.client.get_resource(self.href) catalog_resource = self.get_catalog_resource( catalog_name, is_admin_operation=True) owner_link = find_link( catalog_resource, rel=RelationType.DOWN, media_type=EntityType.OWNER.value, fail_if_absent=True) catalog_href = owner_link.href user_resource = self.get_user(user_name) new_owner = catalog_resource.Owner new_owner.User.set('href', user_resource.get('href')) objectify.deannotate(new_owner) return self.client.put_resource(catalog_href, new_owner, EntityType.OWNER.value)
def new_chart(data,headings_xlsx,rId): headings=[[]] for i,e in enumerate(headings_xlsx): headings[i].append(headings_xlsx[i]) if i == len(headings_xlsx)-1: continue else: headings.append([]) chartFrame = CT_Chart_Container.new_chart_wrapper(rId) # set type of contained graphic to table chartData = chartFrame['chart'].plotArea # add chart data element tree chart = CT_ChartCell.new_chart(data,headings,headings_xlsx) chartData.append(chart) chart =CT_plotArea.new_plot_vars() chartData.append(chart) chart =CT_plotArea.plot_val_vars() chartData.append(chart) objectify.deannotate(chartData, cleanup_namespaces=True) chartXML = etree.tostring(chartFrame, pretty_print=True,xml_declaration = True, encoding='UTF-8', standalone="yes") return chartFrame
def parse_xml(self): root = self.xml_tree.getroot() objectify.deannotate(root, cleanup_namespaces=True) for class_name in self.schema_classes: print class_name #exit() self.parse_xml_element(root,None,None)
def add_entry(self, path, title, username="", password="", url="", notes=""): # handle groups - check if exists and if not, create group xpathstring = '/KeePassFile/Root/Group[Name="Root"]' deepest_group = 0 # search if group path exists, if not, create it groups = path.split("/") for group in groups: #group = groups[i] # FIXME: use .find() prev_xpath_element = self.database.obj_root.xpath(xpathstring) xpathstring = '%s/Group[Name="%s"]' % (xpathstring, group) group_el = self.database.obj_root.xpath(xpathstring) if not group_el: if prev_xpath_element: new_group_el = etree.SubElement(prev_xpath_element[0], "Group") new_group_el.Name = group new_group_el.UUID = base64.b64encode(uuid.uuid4().bytes) deepest_group = new_group_el else: deepest_group = group_el[0] # create entry entry = etree.SubElement(deepest_group, "Entry") entry.UUID = base64.b64encode(uuid.uuid4().bytes) self._entry_el_add_stringattribute(entry, "Title", title.decode("utf-8")) self._entry_el_add_stringattribute(entry, "UserName", username.decode("utf-8")) self._entry_el_add_stringattribute(entry, "Password", password.decode("utf-8")) self._entry_el_add_stringattribute(entry, "URL", url.decode("utf-8")) objectify.deannotate(self.database.obj_root, cleanup_namespaces=True) self._save()
def run(self, data_element): objectify.deannotate(data_element) etree.cleanup_namespaces(data_element) rpr_text = etree.tostring(data_element, pretty_print=True, encoding=text_type) self._stream.write(rpr_text)
def save(self, fileName=None): '''Save this POM to a file Of the file already exists, a backup is created.''' if not fileName: fileName = self.pomFile tmp = '%s.tmp' % fileName bak = '%s.bak' % fileName dir = os.path.dirname(tmp) if not os.path.exists(dir): os.makedirs(dir) if os.path.exists(tmp): os.remove(tmp) objectify.deannotate(self.xml) etree.cleanup_namespaces(self.xml) deps = getattr(self.project, 'dependencies', None) if deps is not None and len(deps) == 0: self.project.dependencies.remove() self.xml.write(tmp, encoding="UTF-8", pretty_print=True) if os.path.exists(fileName): os.rename(fileName, bak) os.rename(tmp, fileName)
def oxml_tostring(elm, encoding=None, pretty_print=False, standalone=None): # if xsi parameter is not set to False, PowerPoint won't load without a # repair step; deannotate removes some original xsi:type tags in core.xml # if this parameter is left out (or set to True) objectify.deannotate(elm, xsi=False, cleanup_namespaces=True) return etree.tostring(elm, encoding=encoding, pretty_print=pretty_print, standalone=standalone)
def new_placeholder_sp(id_, name, ph_type, orient, sz, idx): """ Return a new ``<p:sp>`` element tree configured as a placeholder shape. """ xml = CT_Shape._ph_sp_tmpl % (id_, name) sp = oxml_fromstring(xml) # placeholder shapes get a "no group" lock _SubElement(sp.nvSpPr.cNvSpPr, 'a:spLocks') sp.nvSpPr.cNvSpPr[qn('a:spLocks')].set('noGrp', '1') # placeholder (ph) element attributes values vary by type ph = _SubElement(sp.nvSpPr.nvPr, 'p:ph') if ph_type != PH_TYPE_OBJ: ph.set('type', ph_type) if orient != PH_ORIENT_HORZ: ph.set('orient', orient) if sz != PH_SZ_FULL: ph.set('sz', sz) if idx != 0: ph.set('idx', str(idx)) placeholder_types_that_have_a_text_frame = ( PH_TYPE_TITLE, PH_TYPE_CTRTITLE, PH_TYPE_SUBTITLE, PH_TYPE_BODY, PH_TYPE_OBJ) if ph_type in placeholder_types_that_have_a_text_frame: sp.append(CT_TextBody.new_txBody()) objectify.deannotate(sp, cleanup_namespaces=True) return sp
def serialize_part_xml(part_elm): # if xsi parameter is not set to False, PowerPoint won't load without a # repair step; deannotate removes some original xsi:type tags in core.xml # if this parameter is left out (or set to True) objectify.deannotate(part_elm, xsi=False, cleanup_namespaces=False) xml = etree.tostring(part_elm, encoding='UTF-8', standalone=True) return xml
def save(self, filename=None): """Save the represented language database to the specified file. @type filename: basestring @param filename: The path and name of the file to store the language database in.""" if filename is None and not self.filename is None: filename = self.filename if filename is None: raise IOError('No filename given!') # FIXME (Bug #423): Find a way to make deannotate() below actually remove those # annoying pytype and xsi attribs. objectify.deannotate(self.xmlroot) # Make sure that we can successfully create the XML text before we open # (and possibly truncate) the file. try: xmlstring = etree.tostring( self.xmlroot, pretty_print=True, xml_declaration=True, encoding='utf-8' ) except Exception, exc: raise exc
def scan_module(module_path): try: pom = open(module_path, 'r') xml = etree.XML(pom.read().encode()) objectify.deannotate(xml, cleanup_namespaces = True) pom.close() m = dict(xml.nsmap) namespace_prefix = '' if None in m: namespace_prefix = 'd:' m['d'] = m[None] del m[None] # extracts from a pom xml tree # the data of interest ex = lambda e: ( # module id e.findtext("%sgroupId" % namespace_prefix, namespaces = m) or e.findtext("%sparent/%sgroupId" % (namespace_prefix, namespace_prefix), namespaces = m), e.findtext("%sartifactId" % namespace_prefix, namespaces = m), # module type e.findtext("%spackaging" % namespace_prefix, namespaces = m), # dependencies list(ex(d) for d in e.xpath("%sdependencies/*" % namespace_prefix, namespaces = m)) + list(ex(p) for p in e.xpath("%sparent" % namespace_prefix, namespaces = m)) ) group, artifact, packaging, dependencies = ex(xml) return group, artifact, packaging, dependencies, os.path.dirname(module_path) except lxml.etree.XMLSyntaxError: print("WARNING: %s failed to be parsed, this project will be ignored" % module_path) raise ValueError("%s does not have a valid POM file" % module_path)
def convert(self): import codecs outfile = codecs.open(self._xmlFn, 'w', 'utf-8') outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n') objectify.deannotate(self._root, xsi_nil=True, cleanup_namespaces=True) outstr = etree.tostring(self._root, pretty_print=True) outstr = outstr.replace('-1C', '-2A') outstr = outstr.replace('<TILE_ID', '<TILE_ID_2A') outstr = outstr.replace('<DATASTRIP_ID', '<DATASTRIP_ID_2A') outstr = outstr.replace('<Product_Info', '<L2A_Product_Info') outstr = outstr.replace('<Product_Organisation', '<L2A_Product_Organisation') outstr = outstr.replace('<Product_Image_Characteristics', '<L2A_Product_Image_Characteristics') outstr = outstr.replace('<Pixel_Level_QI', '<L1C_Pixel_Level_QI') outstr = outstr.replace('</TILE_ID', '</TILE_ID_2A') outstr = outstr.replace('</DATASTRIP_ID', '</DATASTRIP_ID_2A') outstr = outstr.replace('</Product_Info', '</L2A_Product_Info') outstr = outstr.replace('</Product_Organisation', '</L2A_Product_Organisation') outstr = outstr.replace('</Product_Image_Characteristics', '</L2A_Product_Image_Characteristics') outstr = outstr.replace('</Pixel_Level_QI', '</L1C_Pixel_Level_QI') if self._product == 'T2A': outstr = outstr.replace('Image_Content_QI>', 'L1C_Image_Content_QI>') if self._product == 'UP2A': outstr = outstr.replace('QUANTIFICATION_VALUE', 'L1C_L2A_Quantification_Values_List') outstr = outstr.replace('</n1:Auxiliary_Data_Info>', '</n1:Auxiliary_Data_Info>\n'\ '<n1:L2A_Auxiliary_Data_Info/>') outstr = outstr.replace('</n1:Quality_Indicators_Info>', '</n1:Quality_Indicators_Info>\n'\ '<n1:L2A_Quality_Indicators_Info/>') outfile.write(outstr) outfile.close() return self.setRoot()
def _get_metadata(self, query, category, crossref=False): """ Retrieve metadata from all search results and store in dictionary of elements. Positional arguments: query (str) -- the initial query string for the given search results. category (str) -- the container in which to store the gathered metadata. See self.metadata_collection template in __init__. """ # It seems results are returned in a totally different format depending on search client used. # The 'premium' client ("Search") returns XML of matching records. self.crossref = crossref if self.client == "Search": self.tree = etree.fromstring(self.search_results.records) objectify.deannotate(self.tree, cleanup_namespaces=True) for record in self.tree: self.meta_record = MetaWos(record, query) article_metadata = self.meta_record.compile_metadata() if self.citing_metadata: article_metadata["source_id"] = self.uid self.metadata_collection[category].append(article_metadata) # The 'lite' client returns a list of dictionary-like objects elif self.client == "Lite": for record in self.search_results.records: article_metadata = dict(record) if self.crossref: abstract = CrossRef.get_abstract(article_metadata) self.metadata_collection[category].append(article_metadata) else: print "Inappropriate method for metadata retrieval: {0}".format(self.client)
def new_chart_wrapper(rId): xml = CT_Chart_Container._chart_tmpl % rId graphicFrame = oxml_fromstring(xml) objectify.deannotate(graphicFrame, cleanup_namespaces=True) return graphicFrame
def getWaterLevelRawSixMinuteDataExt(self, beginDate, endDate, station, datum='MLLW', unit='feet', shift='GMT'): if self.logger: self.logger.debug("SOAP WSDL: %s" % (self.baseUrl)) soapClient = Client(self.baseUrl, retxml=True) if(unit == 'feet'): unit = 1 else: unit = 2 if(shift == 'GMT'): shift = 0 else: shift = 1 ret_xml = soapClient.service.getWaterLevelRawSixMin(station, beginDate, endDate, datum, unit, shift) if self.logger: self.logger.debug(ret_xml) parser = XMLParser(remove_blank_text=True, huge_tree=True) parser.set_element_class_lookup(objectify.ObjectifyElementClassLookup()) objectify.set_default_parser(parser) root = objectify.fromstring(ret_xml) objectify.deannotate(root, cleanup_namespaces=True) return(root)
def import_metadata(self, destination, model_year): x = xp.XMLParser(self.input_file) y = copy.deepcopy(x) y.root.idinfo.timeperd.timeinfo.sngdate.caldate = model_year objectify.deannotate(y.tree) etree.cleanup_namespaces(y.tree) output_file = destination + '/metadata.xml' y.tree.write(output_file, pretty_print=True)
def strip_namespaces(root): for elem in root.getiterator(): if not hasattr(elem.tag, 'find'): continue # (1) i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i+1:] objectify.deannotate(root, cleanup_namespaces=True) return root
def new(): """ Return a new ``<Types>`` element. """ xml = '<Types xmlns="%s"/>' % nsmap['ct'] types = oxml_fromstring(xml) objectify.deannotate(types, cleanup_namespaces=True) return types
def build_xmlrequest(request_attributes={}, subelements=[]): """Build an XML request string """ request_element = build_basic_element('request', request_attributes, subelements) root = build_basic_element('tellervo', subelements = [request_element]) objectify.deannotate(root, xsi_nil = True) etree.cleanup_namespaces(root) return etree.tostring(root)
continue ##### Strip out namespaces in root element ######################### for elem in root.getiterator(): if not hasattr(elem.tag, 'find'): custom_logger( filePath + ': strip out namespaces in root element | if not hasattr(elem.tag, find): --> ' ) continue i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] objectify.deannotate(root, cleanup_namespaces=True) ##### Party and Address info ###################################################### with open(filePath, 'r') as file: filedata = file.read() file.close() realCaseID = xml_retrieve('.//CaseDefinition', 'CaseID') # also CaseDefinition ExternalID realFirstName = xml_retrieve('.//PartyDefinition', 'FirstName') realMiddleName = xml_retrieve('.//PartyDefinition', 'MiddleName') realLastName = xml_retrieve('.//PartyDefinition', 'LastName') realFullName = xml_retrieve('.//PartyDefinition', 'FullName') realBirthDate = xml_retrieve('.//PartyDefinition', 'BirthDate')
def collect_features(browser, debugfile=None): ''' Read DOM attributes from the current page loaded by the browser, derive page features Data dictionary of collect_features() output: id [int] seq num of node in JS dom tree parent [int] id of parent node tagname [str] HTML tag name depth [int] node count to its deepest descendent in dom tree (etree-based) childcount [int] num of children sourceline [int] line num of source code (etree-based, i.e. start from <body> tag) sourcepct [float] percentage position of source line in HTML (etree-based, within <body>) pospct [float] percentage postiion of node in the DOM (depth-first search of JS DOM) xpct [float] percentage position of element's left edge to window width x [int] pixel coordinate of left edge of element's bounding box to the page y [int] pixel coordinate of top edge of element's bounding box to the page width [int] pixel width of element's bounding box height [int] pixel height of element's bounding box fgcolor [str] foreground color, in form of rgb(255,255,255) or rgba(255,255,255,1.0) bgcolor [str] background color, in form of rgb(255,255,255) or rgba(255,255,255,1.0) textxws [int] character length of text excluding whitespaces textlen [int] character length of text htmllen [int] character length of HTML code visible [bool] visibility of this element fontsize [float] font size xpath [str] xpath of element textclip [str] starting and ending snippet of text goodness [bool] is this part of main content ''' dom = browser.getDOMdata(True) # synchronous get winparam = browser.windowParams winwidth = winparam['innerWidth'] logger.debug("%d web elements found" % len(dom)) page_source = next((x[-1] for x in dom if x[0]=='/html/body'),'') assert(page_source) # we assumed there must be a body domtree = html2dom(page_source) # need to pretty format source before use objectify.deannotate(domtree, cleanup_namespaces=True) linecount = len(page_source.split("\n")) if debugfile: open(debugfile,'w').write(etree.tostring(domtree, encoding='utf8', pretty_print=True, method='xml')) # populate DOM tree geometry data xpathHash = {attrs[0]:i for i,attrs in enumerate(dom)} depthHash = {} # actually "height", distance from node to deepest leaf, based on lxml etree def findElementDepth(e): "e: lxml etree element node, find its depth in dom tree" if e not in depthHash: if len(e): # e has children depthHash[e] = 1 + max(findElementDepth(x) for x in e.iterchildren()) else: # e has no children, by definition depth=0 depthHash[e] = 0 return depthHash[e] # collect element attributes: attributes = [] try: # for pages we know where are the main body content_xpaths = get_content_xpaths(winparam.addr, domtree) except NotImplementedError: logger.critical('No content identifier for URL %s' % browser.current_url) content_xpaths = [] for i,attrs in enumerate(dom): if i and (i % 1000 == 0): logger.debug('...on element #%d' % i) xpath, display, visible, x, y, width, height, fgcolor, bgcolor, fontsize, textonly, htmlcode = attrs if not xpath or re.search(r'[^a-z0-9\[\]\/]',xpath) or re.search(r'(?<!\w)(script|head)(?!\w)',xpath): continue # skip these to avoid pollution by JS or HTML header etreenode = domtree.xpath(xpath) if len(etreenode) != 1: if not etreenode: logger.error('WebDriver reported XPath cannot be found in lxml: %s' % xpath) continue else: logger.error('XPath not unique for %s. %d elements found.' % (xpath, len(etreenode))) parent = xpathHash.get(xpath.rsplit('/',1)[0]) tagname = xpath.rsplit('/',1)[-1].split('[',1)[0] depth = findElementDepth(etreenode[0]) if etreenode: childcount = len(etreenode) else: childcount = len(n for n in xpathHash if n.startwith(xpath) and '/' not in n[len(xpath):]) sourceline = etreenode[0].sourceline fgcolor = fgcolor.replace(' ','') bgcolor = bgcolor.replace(' ','') textonly = condense_space(textonly) # text from JS retains word boundary by replacing tag with space while etree.tostring() just remove tags htmlcode = condense_space(htmlcode) if not htmlcode: # JS cannot give out the HTML, use etree version instead htmlcode = condense_space(etree.tostring(etreenode[0], encoding='utf8', method='html').decode('utf8')) # derived data textlen, htmllen = len(textonly), len(htmlcode) textxws = sum(1 for c in textonly if c and not c.isspace()) # text length excluding whitespaces if not htmllen: logger.error('empty HTML for tag %s on line %s at (%s,%s)+(%s,%s)' % (tagname, sourceline, x,y,width,height)) textclip = abbreviate(textonly) sourcepct = float(sourceline)/linecount xpct = float(x)/winwidth pospct = float(i+1)/len(dom) isgood = -1 if not content_xpaths else 1 if visible and display and xpath in content_xpaths else 0 # remember this attributes.append([i, parent, tagname, depth, childcount, sourceline, sourcepct, pospct, xpct, x, y, width, height, fgcolor, bgcolor, textxws, textlen, htmllen, min(visible,display), fontsize, xpath, textclip, isgood]) header = ("id parent tagname depth childcount sourceline sourcepct pospct xpct x y width height " "fgcolor bgcolor textxws textlen htmllen visible fontsize xpath textclip goodness").split() return header, attributes
def normalize(self, mt_string): mt_tree = etree.fromstring(mt_string) self.__remove_attributes_tree(mt_tree) objectify.deannotate(mt_tree, cleanup_namespaces=True) return etree.tostring(mt_tree)
def create_xml(p_component_id, p_full_name, p_plain_name, p_code, p_stage, p_verid, p_operation_code, p_operation_name, p_tool_ext_id): # Создаем XML файл. xml = '''<SOAP:Envelope xmlns:SOAP='http://schemas.xmlsoap.org/soap/envelope/'> <SOAP:Header/> <SOAP:Body> <ProductionOrderRequest xmlns:prx='urn:sap.com:proxy:EPR:/1SAI/TAS6CA1BE8F0794C74DF05D:731'> <HeaderSection> <UUID>{0}</UUID> <Sender>KSAUPKK_ERP</Sender> <Recipients>SUIKP</Recipients> <CreationDateTime>{1}</CreationDateTime> </HeaderSection> <DataSection> </DataSection> </ProductionOrderRequest> </SOAP:Body> </SOAP:Envelope> '''.format(u.uuid1(), "%sZ" % datetime.datetime.now().isoformat(timespec="seconds")) root = objectify.fromstring(xml) nav = root.find(".//DataSection") # заполнение данных order_name = "T{0}-{1}".format(datetime.datetime.now().strftime("%y%m%d"), randint(1, 999)) sector_hex = "{0:x}".format(int(c_sector)) item = create_appt({ "AUFNR": order_name, "AUART": "P%s" % c_manufacture, "GSTRS": datetime.datetime.now().strftime("%Y-%m-%d"), "GLTRS": datetime.datetime.now().strftime("%Y-%m-%d"), "LGORT": "{0}{1}".format(c_manufacture, sector_hex), "PLNBEZ": p_component_id, "MATXT": p_full_name, "HF_DSE_NAME": p_plain_name, "HF_DSE_KTD": p_code, "HF_DSE_NAME_H": p_stage, "BMENGE": c_amount, "BMEINS": "ST", "IGMNG": "0.0", "APRIO": "", }) nav.append(item) # вариант изготовления ДСЕ eiafpol = create_e1afpol({"VERID": p_verid}) nav = root.find(".//APRIO") nav.addnext(eiafpol) # статус заказа e1jstkl = create_e1jstkl({"STAT": "I0002"}) nav = root.find(".//E1AFPOL") nav.addnext(e1jstkl) # операция по заказу e1afvol = create_e1afvol({ "VORNR": p_operation_code, "LTXA1": p_operation_name, "ARBID": p_tool_ext_id, "MGVRG": c_amount, "MEINH": "ST", "LMNGA": "0.0", }) nav = root.find(".//E1JSTKL") nav.addnext(e1afvol) # удаляем все lxml аннотации. objectify.deannotate(root) # удаляем лишние неимспеисы. nav = root.find(".//Item") etree.cleanup_namespaces(nav) obj_xml = etree.tostring(root, encoding="UTF-8", pretty_print=True, xml_declaration=True) order_file_name = "Order_%s.xml" % datetime.datetime.now().isoformat() try: with open(order_file_name, "wb") as xml_writer: xml_writer.write(obj_xml) except IOError: pass print("XML file {0} created".format(order_file_name)) return order_file_name
def __setitem__(self, key, value): """Set value for attr key.""" attr = setattr(self.root, key, value) objectify.deannotate(self.root) return attr
def __getitem__(self, key): """Return the value for attr key.""" attr = getattr(self.root, key, None) objectify.deannotate(self.root) return attr
def update_final_report(config, criteria, stations, station, time, status = None, comment = None, results = None, score = None, correction = None): """ Ažurira finalni izveštaj sa pojedinačnim elementom ocene zadatka Mehanizam rada je takav da će menjati samo jedan <assignment> nod u XML fajlu i to onaj koji se tiče zadatka kojem se prijavljuju izmene u rezultatu. Ostatak sadržaja neće biti učitavan i menjan. config - globalna konfiguracija alata za pregled criteria - kriterijum pregleda zadatka (bodovanje, način izvršavanja itd.) stations - kolekcija računara i studenata koji su radili zadatak (ključ - oznaka računara, podatak - lista - broj indeksa i ime/prezime studenta) station - oznaka računara na kojem je zadatak urađen time - timestamp poslednje izmene na rezultatima status - status celog zadatka comment - komentar pregledača na zadatak results - lista tuple objekata TestResults - rezultat izvršenja testova score - procentualni učinak studenta na zadatku correction - korekcija koju je pregledač uveo """ print 'Azuriranje XML fajla sa izvestajem sa ispita: "{0}"'.format(config.FINAL_REPORT_FILENAME) try: criteria.total_points # Provera da li je definisana varijabla except NameError: criteria.total_points = 100 total_points_f = float(criteria.total_points) if not (results is None): total = 0.0 for r in results: if r.success: total += r.score # Ako fajl ne postoji, kreiraj korenski element: if not(path.isfile(config.FINAL_REPORT_FILENAME)): root = objectify.Element('assignments') else: with open(config.FINAL_REPORT_FILENAME) as f: xml = f.read() root = objectify.fromstring(xml) # Probaj pronaci <assignment> tag koji se odnosi na zadatu stanicu: assign = None for a in root.getchildren(): if a.get('station') == station: assign = a # Ako prethodno ne postoji takav <assignment>, kreiraj novi: if assign == None: assign = objectify.SubElement(root, 'assignment') # Podesi nove podatke: assign.set('station', station) assign['id'] = stations[station][0] assign['name'] = stations[station][1] # Podesi nove podatke: assign.time = time if not (results is None): assign['test-score'] = '{:.2f}'.format(total) if not (comment is None): assign['comment'] = comment if not (score is None): assign['direct-score'] = '{:.2f}'.format(score) assign['final-pct'] = '{:.2f}'.format(score) if not (correction is None): # Ako je zadata korekcija 0 - onda se korekcija ukida: if correction == 0: sub = assign.find('correction') if (sub is not None): assign.remove(sub) else: assign['correction'] = '{:+.2f}'.format(correction) final_number = 0 reason = u'nema uspešnih testova' if assign.find('test-score') is not None: final_number = float(assign['test-score']) reason = u'{:+.2f}% na uspešne testove'.format(final_number) if correction != 0: final_number = final_number + correction reason = reason + u' i {:+.2f}% korekcija'.format(correction) final = '{:.2f}'.format(final_number) points = int(round(final_number * (total_points_f / 100.0), 0)) assign['final-pct'] = final assign['final-points'] = points assign['reason'] = reason if not (results is None): # Ako je bilo direktno zadatog rezultata, nakon što su izvršeni automatski testovi, taj skor se briše: sub = assign.find('direct-score') if (sub is not None): assign.remove(sub) while True: sub = assign.find('tests') if (sub is None): break; assign.remove(sub) tests_root = objectify.SubElement(assign, 'tests') for r in results: test = objectify.SubElement(tests_root, 'test') test.attrib['name'] = r.name test['runs'] = r.runs test['passes'] = r.passes test['failures'] = r.failures test['test-fails'] = r.test_fails test['crashes'] = r.crashes test['time-outs'] = r.time_outs test['total-duration'] = '{:.2f}'.format(r.total_duration) test['max-duration'] = '{:.2f}'.format(r.max_duration) test['success'] = r.success test['score'] = '{:.2f}'.format(r.score) test['factor'] = r.factor executions = objectify.SubElement(test, 'executions') for e in r.executions: elem = objectify.SubElement(executions, 'passed') elem._setText(str(e).lower()) # Logika za odredjivanje broja poena, u odnosu na status rada i ostale parametre: if not (status is None): assign['status'] = status if status == ASSIGNMENT_STATUS_BLOCKED: final = '0' points = 0 reason = u'makar jedan od blokirajućih testova ne prolazi' elif status == ASSIGNMENT_STATUS_FAILS_TO_COMPILE: final = '0' points = 0 reason = u'projekat se ne kompajlira uspešno' elif status == ASSIGNMENT_STATUS_DIRECTLY_RATED: final = assign['direct-score'] final_number = float(assign['direct-score']) points = int(round(final_number * (total_points_f / 100.0), 0)) reason = u'direktno zadata ocena' elif status == ASSIGNMENT_STATUS_OK: final_number = float(assign['test-score']) reason = u'{0}% na uspešne testove'.format(final_number) sub = assign.find('correction') if (sub is not None): corr = float(assign['correction']) final_number = final_number + corr reason = reason + u' i {:+.2f}% korekcija'.format(corr) final = '{:.2f}'.format(final_number) points = int(round(final_number * (total_points_f / 100.0), 0)) elif status == ASSIGNMENT_STATUS_SKIPPED: final_number = 0 points = 0 final = '{:.2f}'.format(final_number) reason = u'rad je preskočen' else: util.fatal_error('''Interna greska: status "{0}" nema definisana pravila za bodovanje! Kontaktirati autora programa.'''.format(status)) assign['final-pct'] = final assign['final-points'] = points assign['reason'] = reason # Upiši izmenjen fajl: f = open(config.FINAL_REPORT_FILENAME, 'w') objectify.deannotate(root) # Skidanje objectify anotacija # Dodaje se XML zaglavlje u kojem se navodi UTF-8 kao upotrebljeno enkodiranje i referencira se XSLT dokument: f.write('<?xml version="1.0" encoding="UTF-8"?>\n<?xml-stylesheet type="text/xsl" href="{0}"?>\n' .format(config.FINAL_REPORT_XSLT_FILENAME)) f.write(etree.tostring(root, xml_declaration=False, encoding='utf-8', pretty_print=True)) f.close()
def export(self, property_state): """ Export HPXML file from an existing HPXML file (from import) merging in the data from property_state :param property_state: object, PropertyState to merge into HPXMLs :return: string, as XML """ if not property_state: f = BytesIO() self.tree.write(f, encoding='utf-8', pretty_print=True, xml_declaration=True) return f.getvalue() if self.tree is None: tree = objectify.parse(os.path.join(here, 'schemas', 'blank.xml'), parser=hpxml_parser) root = tree.getroot() else: root = deepcopy(self.root) bldg = self._get_building( property_state.extra_data.get('hpxml_building_id'), start_from=root) for pskey, xml_loc in self.HPXML_STRUCT.items(): value = getattr(property_state, pskey) el = self.xpath(xml_loc['path'], start_from=bldg, only_one=True) if pskey == 'energy_score': continue if value is None and self.tree is None: el.getparent().remove(el) if value is None or el is None: continue # set the value to magnitude if it is a quantity if isinstance(value, ureg.Quantity): value = value.magnitude setattr(el.getparent(), el.tag[el.tag.index('}') + 1:], str(value) if not isinstance(value, basestring) else value) E = objectify.ElementMaker(annotate=False, namespace=self.NS, nsmap={None: self.NS}) # Owner Information owner = self.xpath(( '//h:Customer/h:CustomerDetails/h:Person' '[not(h:IndividualType) or h:IndividualType = "owner-occupant" or h:IndividualType = "owner-non-occupant"]' ), start_from=root) if len(owner) > 0: owner = owner[0] else: customer = E.Customer( E.CustomerDetails( E.Person(E.SystemIdentifier(id='person1'), E.Name()))) root.Building.addprevious(customer) owner = customer.CustomerDetails.Person # Owner Name if property_state.owner is not None: try: owner_name, name_type = pp.tag(property_state.owner, type='person') except pp.RepeatedLabelError: pass else: if name_type.lower() == 'person': owner.Name.clear() if 'PrefixMarital' in owner_name or 'PrefixOther' in owner_name: owner.Name.append( E.PrefixName(' '.join([ owner_name.get('Prefix' + x, '') for x in ('Marital', 'Other') ]).strip())) if 'GivenName' in owner_name: owner.Name.append(E.FirstName(owner_name['GivenName'])) elif 'FirstInitial' in owner_name: owner.Name.append( E.FirstName(owner_name['FirstInitial'])) else: owner.Name.append(E.FirstName()) if 'MiddleName' in owner_name: owner.Name.append( E.MiddleName(owner_name['MiddleName'])) elif 'MiddleInitial' in owner_name: owner.Name.append( E.MiddleName(owner_name['MiddleInitial'])) if 'Surname' in owner_name: owner.Name.append(E.LastName(owner_name['Surname'])) elif 'LastInitial' in owner_name: owner.Name.append(E.LastName( owner_name['LastInitial'])) else: owner.Name.append(E.LastName()) if 'SuffixGenerational' in owner_name or 'SuffixOther' in owner_name: owner.Name.append( E.SuffixName(' '.join([ owner_name.get('Suffix' + x, '') for x in ('Generational', 'Other') ]).strip())) # Owner Email if property_state.owner_email is not None: new_email = E.Email(E.EmailAddress(property_state.owner_email), E.PreferredContactMethod(True)) if hasattr(owner, 'Email'): if property_state.owner_email not in owner.Email: owner.append(new_email) else: owner.append(new_email) # Owner Telephone if property_state.owner_telephone is not None: insert_phone_number = False if hasattr(owner, 'Telephone'): if property_state.owner_telephone not in owner.Telephone: insert_phone_number = True else: insert_phone_number = True if insert_phone_number: new_phone = E.Telephone( E.TelephoneNumber(property_state.owner_telephone), E.PreferredContactMethod(True)) inserted_phone_number = False for elname in ('Email', 'extension'): if hasattr(owner, elname): getattr(owner, elname).addprevious(new_phone) inserted_phone_number = True break if not inserted_phone_number: owner.append(new_phone) # Owner Address try: address = owner.getparent().MailingAddress except AttributeError: owner.getparent().Person[-1].addnext(E.MailingAddress()) address = owner.getparent().MailingAddress address.clear() if property_state.owner_address is not None: address.append(E.Address1(property_state.owner_address)) if property_state.owner_city_state is not None: city_state, _ = usadd.tag(property_state.owner_city_state) address.append(E.CityMunicipality(city_state.get('PlaceName', ''))) address.append(E.StateCode(city_state.get('StateName', ''))) if property_state.owner_postal_code is not None: address.append(E.ZipCode(property_state.owner_postal_code)) # Building Certification / Program Certificate program_certificate_options = [ 'Home Performance with Energy Star', 'LEED Certified', 'LEED Silver', 'LEED Gold', 'LEED Platinum', 'other' ] if property_state.building_certification is not None: try: root.Project except AttributeError: root.Building[-1].addnext( E.Project( E.BuildingID(id=bldg.BuildingID.get('id')), E.ProjectDetails( E.ProjectSystemIdentifiers( id=bldg.BuildingID.get('id'))))) new_prog_cert = E.ProgramCertificate( property_state.building_certification if property_state. building_certification in program_certificate_options else 'other') try: root.Project.ProjectDetails.ProgramCertificate except AttributeError: for elname in ('YearCertified', 'CertifyingOrganizationURL', 'CertifyingOrganization', 'ProgramSponsor', 'ContractorSystemIdentifiers', 'ProgramName', 'ProjectSystemIdentifiers'): if hasattr(root.Project.ProjectDetails, elname): getattr(root.Project.ProjectDetails, elname).addnext(new_prog_cert) break else: if property_state.building_certification not in root.Project.ProjectDetails.ProgramCertificate: root.Project.ProjectDetails.ProgramCertificate[-1].addnext( new_prog_cert) # Energy Score energy_score_type_options = ['US DOE Home Energy Score', 'RESNET HERS'] bldg_const = bldg.BuildingDetails.BuildingSummary.BuildingConstruction if property_state.energy_score: energy_score_type = property_state.extra_data.get( 'energy_score_type') try: found_energy_score = False for energy_score_el in bldg_const.EnergyScore: if energy_score_type in (energy_score_el.ScoreType, getattr(energy_score_el, 'OtherScoreType', None)): found_energy_score = True break if not found_energy_score: energy_score_el = E.EnergyScore() bldg_const.EnergyScore[-1].addnext(energy_score_el) except AttributeError: energy_score_el = E.EnergyScore() try: bldg_const.extension.addprevious(energy_score_el) except AttributeError: bldg_const.append(energy_score_el) if energy_score_type in energy_score_type_options: energy_score_el.ScoreType = energy_score_type else: energy_score_el.ScoreType = 'other' energy_score_el.OtherScoreType = energy_score_type energy_score_el.Score = property_state.energy_score # Serialize tree = etree.ElementTree(root) objectify.deannotate(tree, cleanup_namespaces=True) f = BytesIO() tree.write(f, encoding='utf-8', pretty_print=True, xml_declaration=True) return f.getvalue()
def create_report_metadata(self): """ Create the XML file containing metadata to be written into the accuracy assessment report Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Connect to the lemma web database web_db = web_database.WebDatabase(p.model_project, p.model_region, p.web_dsn) # Create the XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/report_metadata.xsd' root_str = """ <report_metadata xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ('http://www.w3.org/2001/XMLSchema-instance', xml_schema_file) #root_str = "<report_metadata/>" root_elem = objectify.fromstring(root_str) # Get the model region overview mr_overview = web_db.get_model_region_info() field_names = mr_overview.dtype.names overview_elem = etree.SubElement(root_elem, 'overview') for f in field_names: child = etree.SubElement(overview_elem, f.lower()) overview_elem[child.tag] = getattr(mr_overview[0], f) # Get contact info for people associated with this project people_info = web_db.get_people_info() field_names = people_info.dtype.names people_elem = etree.SubElement(root_elem, 'contact_information') for person in people_info: person_elem = etree.SubElement(people_elem, 'contact') for f in field_names: child = etree.SubElement(person_elem, f.lower()) person_elem[child.tag] = getattr(person, f) # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Subset the string of plot IDs to thin to one plot at a # location just for locations that have the exact same spectral # values for all plot measurements (i.e. places where the # imagery has been stabilized delete_list = self.plot_db.get_duplicate_plots_to_remove(self.id_str) if len(delete_list) > 0: id_list_subset = [int(x) for x in self.id_str.split(",")] for id in delete_list: try: id_list_subset.remove(id) # if the ID is not in the list, go on to the next ID except ValueError: continue # turn subsetted id_list into a string id_str_subset = ','.join(map(str, id_list_subset)) else: id_str_subset = self.id_str # Get the plot data sources data_sources = self.plot_db.get_plot_data_source_summary(id_str_subset) field_names = data_sources.dtype.names data_sources_elem = etree.SubElement(root_elem, 'plot_data_sources') # Create subelements for each unique plot data source for ds in np.unique(data_sources.DATA_SOURCE): data_source_elem = \ etree.SubElement(data_sources_elem, 'plot_data_source') child = etree.SubElement(data_source_elem, 'data_source') data_source_elem[child.tag] = ds child = etree.SubElement(data_source_elem, 'description') descriptions = \ data_sources[np.where(data_sources.DATA_SOURCE == ds)] description = np.unique(descriptions) data_source_elem[child.tag] = description['DESCRIPTION'][0] years_elem = etree.SubElement(data_source_elem, 'assessment_years') recs = data_sources[np.where(data_sources.DATA_SOURCE == ds)] # Create subelements for each plot assessment years for # this data source for rec in recs: year_elem = etree.SubElement(years_elem, 'year') child = etree.SubElement(year_elem, 'assessment_year') year_elem[child.tag] = getattr(rec, 'ASSESSMENT_YEAR') child = etree.SubElement(year_elem, 'plot_count') year_elem[child.tag] = getattr(rec, 'PLOT_COUNT') # Get the species scientific and common names species_names = \ self.plot_db.get_species_names(self.id_str, p.lump_table) field_names = species_names.dtype.names species_names_elem = etree.SubElement(root_elem, 'species_names') for species_name in species_names: species_name_elem = etree.SubElement(species_names_elem, 'species') for f in field_names: child = etree.SubElement(species_name_elem, f.lower()) species_name_elem[child.tag] = getattr(species_name, f) # Get the ordination variable descriptions ordination_vars = ','.join(p.get_ordination_variable_names()) ordination_descr = \ self.plot_db.get_ordination_variable_descriptions(ordination_vars) field_names = ordination_descr.dtype.names ord_vars_elem = etree.SubElement(root_elem, 'ordination_variables') for ord_var in ordination_descr: ord_var_elem = \ etree.SubElement(ord_vars_elem, 'ordination_variable') for f in field_names: child = etree.SubElement(ord_var_elem, f.lower()) ord_var_elem[child.tag] = getattr(ord_var, f) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write XML to file report_metadata_file = p.report_metadata_file aa_dir = os.path.dirname(report_metadata_file) if not os.path.exists(aa_dir): os.makedirs(aa_dir) tree.write(report_metadata_file, pretty_print=True)
def process(type, db, config): location = '%s/%s' % (config.ACT_DIR, type) count = 0 with db.cursor() as cur: parser = etree.XMLParser(resolve_entities=False, huge_tree=True) print location for dirpath, dirs, files in os.walk(location): files = [f for f in files if f.endswith('.xml')] if len(files): path = os.path.join(dirpath.replace(config.ACT_DIR + '/', ''), files[0]) try: print path tree = etree.parse(os.path.join(dirpath, files[0]), parser) objectify.deannotate(tree, cleanup_namespaces=True) for elem in tree.iter(): if not hasattr(elem.tag, 'find'): continue i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] attrib = tree.getroot().attrib if attrib.get('id'): title = etree.tostring( tree.xpath('.//billref|.//title')[0], method="text", encoding="UTF-8") #TODO title = title.replace('\n', '').strip() query = """INSERT INTO instruments (id, govt_id, version, title, path, number, date_as_at,date_assent, type, date_first_valid, date_gazetted, date_terminated, date_imprint, year, repealed, in_amend, pco_suffix, raised_by, official, subtype, terminated, stage, date_signed, imperial, instructing_office, attributes) VALUES (%(id)s, %(govt_id)s, %(version)s, %(title)s, %(path)s, %(number)s, %(date_as_at)s,%(date_assent)s, %(type)s, %(date_first_valid)s, %(date_gazetted)s, %(date_terminated)s, %(date_imprint)s, %(year)s, %(repealed)s, %(in_amend)s, %(pco_suffix)s, %(raised_by)s, %(official)s, %(subtype)s, %(terminated)s, %(stage)s, %(date_signed)s, %(imperial)s, %(instructing_office)s, %(attr)s); """ with open(os.path.join(dirpath, files[0])) as r: cur.execute( """ INSERT INTO documents (document, type) VALUES (%(document)s, 'xml') returning id""", {'document': r.read()}) document_id = cur.fetchone()[0] values = { 'id': document_id, 'govt_id': attrib.get('id'), 'title': title, 'version': int(float(dirpath.split('/')[-1])), 'path': path, 'number': attrib.get( 'sr.no', attrib.get( 'sop.no', attrib.get('act.no', attrib.get('bill.no')))), 'date_first_valid': safe_date(attrib.get('date.first.valid')), 'date_gazetted': safe_date(attrib.get('date.date_gazetted')), 'date_terminated': safe_date(attrib.get('date.terminated')), 'date_imprint': safe_date(attrib.get('date.imprint')), 'date_as_at': safe_date(attrib.get('date.as.at')), 'date_assent': safe_date(attrib.get('date.assent')), 'year': int(attrib.get('year')), 'repealed': attrib.get('terminated') == "repealed", 'in_amend': attrib.get('in.amend') != 'false', 'pco_suffix': attrib.get('pco.suffix'), 'raised_by': attrib.get('raised.by'), 'official': attrib.get('official'), 'type': type, 'subtype': attrib.get( 'act.type', attrib.get('sr.type', attrib.get('bill.type'))), 'terminated': attrib.get('terminated'), 'stage': attrib.get('stage'), 'date_signed': safe_date(attrib.get('date.signed')), 'imperial': attrib.get('imperial') == 'yes', 'instructing_office': attrib.get('instructing_office'), 'attr': json.dumps(dict(attrib)) } cur.execute(query, values) except etree.XMLSyntaxError, e: print 'ERROR', e, path
def __get_clean_mathml(mt_string): mt_tree = etree.parse(StringIO(__dtd + mt_string), __xml_parser).getroot() objectify.deannotate(mt_tree, cleanup_namespaces=True) return mt_tree
def do_export(self, filename, records): print('Choose password for your Keepass file') master_password = getpass.getpass(prompt='...' + 'Keepass Password'.rjust(20) + ': ', stream=None) sfs = [] # type: [SharedFolder] rs = [] # type: [Record] for x in records: if type(x) is Record: rs.append(x) elif type(x) is SharedFolder: sfs.append(x) template_file = os.path.join(os.path.dirname(__file__), 'template.kdbx') with libkeepass.open(template_file, password='******') as kdb: root = kdb.obj_root.Root.Group for sf in sfs: comps = list(path_components(sf.path)) node = root for i in range(len(comps)): comp = comps[i] sub_node = node.find('Group[Name=\'{0}\']'.format(comp)) if sub_node is None: sub_node = objectify.Element('Group') sub_node.UUID = base64.b64encode( os.urandom(16)).decode() sub_node.Name = comp node.append(sub_node) if i == len(comps) - 1: # store Keeper specific info keeper = sub_node.find('Keeper') if keeper is None: keeper = objectify.Element('Keeper') sub_node.append(keeper) else: keeper.clear() keeper.IsShared = True keeper.ManageUsers = sf.manage_users keeper.ManageRecords = sf.manage_records keeper.CanEdit = sf.can_edit keeper.CanShare = sf.can_share if sf.permissions: for perm in sf.permissions: permission = objectify.Element('Permission') if perm.uid: permission.UUID = base64.b64encode( base64.urlsafe_b64decode( perm.uid + '==')).decode() permission.Name = perm.name permission.ManageUsers = perm.manage_users permission.ManageRecords = perm.manage_records keeper.append(permission) node = sub_node for r in rs: try: node = kdb.obj_root.Root.Group fol = None if r.folders: fol = r.folders[0] for is_shared in [True, False]: path = fol.domain if is_shared else fol.path if path: comps = list(path_components(path)) for i in range(len(comps)): comp = comps[i] sub_node = node.find( 'Group[Name=\'{0}\']'.format(comp)) if sub_node is None: sub_node = objectify.Element('Group') sub_node.UUID = base64.b64encode( os.urandom(16)).decode() sub_node.Name = comp node.append(sub_node) node = sub_node entry = None entries = node.findall('Entry') if len(entries) > 0: for en in entries: title = '' login = '' password = '' if hasattr(en, 'String'): for sn in en.String: if hasattr(sn, 'Key') and hasattr( sn, 'Value'): key = sn.Key.text value = sn.Value.text if key == 'Title': title = value elif key == 'UserName': login = value elif key == 'Password': password = value if title == r.title and login == r.login and password == r.password: entry = node break strings = {'URL': r.login_url, 'Notes': r.notes} if r.custom_fields: for cf in r.custom_fields: strings[cf] = r.custom_fields[cf] if entry is None: entry = objectify.Element('Entry') if r.uid: entry.UUID = base64.b64encode( base64.urlsafe_b64decode(r.uid + '==')).decode() else: entry.UUID = base64.b64encode( os.urandom(16)).decode() node.append(entry) strings['Title'] = r.title, strings['UserName'] = r.login, strings['Password'] = r.password, else: for str_node in entry.findall('String'): if hasattr(str_node, 'Key'): key = str_node.Key if key in strings: value = strings[key] if value: str_node.Value = value strings.pop(key) if not fol is None: if fol.domain: keeper = entry.find('Keeper') if keeper is None: keeper = objectify.Element('Keeper') entry.append(keeper) else: keeper.clear() keeper.CanEdit = fol.can_edit keeper.CanShare = fol.can_share for f in r.folders[1:]: link = objectify.Element('Link') keeper.append(link) if f.domain: link.Domain = f.domain link.CanEdit = f.can_edit link.CanShare = f.can_share if f.path: link.Path = f.Path for key in strings: value = strings[key] if value: s_node = objectify.Element('String') s_node.Key = key s_node.Value = value entry.append(s_node) if r.attachments: for atta in r.attachments: max_size = 1024 * 1024 if atta.size < max_size: bins = None bId = 0 if hasattr(kdb.obj_root.Meta, 'Binaries'): bins = kdb.obj_root.Meta.Binaries elems = bins.findall('Binary') bId = len(elems) else: bins = objectify.Element('Binaries') kdb.obj_root.Meta.append(bins) bId = 0 with atta.open() as s: buffer = s.read(max_size) if len(buffer) >= 32: iv = buffer[:16] cipher = AES.new( atta.key, AES.MODE_CBC, iv) buffer = cipher.decrypt(buffer[16:]) if len(buffer) > 0: buffer = unpad_binary(buffer) out = io.BytesIO() with gzip.GzipFile(fileobj=out, mode='w') as gz: gz.write(buffer) bin = objectify.E.Binary( base64.b64encode( out.getvalue()).decode(), Compressed=str(True), ID=str(bId)) bins.append(bin) bin = objectify.Element('Binary') bin.Key = atta.name bin.Value = objectify.Element( 'Value', Ref=str(bId)) entry.append(bin) else: print( 'Warning: File \'{0}\' was skipped because it exceeds the 1MB Keepass filesize limit.' .format(atta.name)) except Exception as e: pass objectify.deannotate(root, xsi_nil=True) etree.cleanup_namespaces(root) kdb.clear_credentials() kdb.add_credentials(password=master_password) with open(filename, 'wb') as output: kdb.write_to(output)
def parse_mets(self): """ Parse METS file and save data to DIP, DigitalFile, and PremisEvent models """ # Open xml file and strip namespaces tree = etree.parse(self.path) root = tree.getroot() for elem in root.getiterator(): if not hasattr(elem.tag, 'find'): continue # (1) i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] objectify.deannotate(root, cleanup_namespaces=True) # Create dict for names and xpaths of desired info from individual files xml_file_elements = { 'filepath': './techMD/mdWrap/xmlData/object/originalName', 'uuid': './techMD/mdWrap/xmlData/object/objectIdentifier/objectIdentifierValue', 'hashtype': './techMD/mdWrap/xmlData/object/objectCharacteristics/fixity/messageDigestAlgorithm', 'hashvalue': './techMD/mdWrap/xmlData/object/objectCharacteristics/fixity/messageDigest', 'bytes': './techMD/mdWrap/xmlData/object/objectCharacteristics/size', 'format': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatDesignation/formatName', 'version': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatDesignation/formatVersion', 'puid': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatRegistry/formatRegistryKey', 'fits_modified_unixtime': './techMD/mdWrap/xmlData/object/objectCharacteristics/objectCharacteristicsExtension/fits/fileinfo/fslastmodified[@toolname="OIS File Information"]', 'fits_modified': './techMD/mdWrap/xmlData/object/objectCharacteristics/objectCharacteristicsExtension/fits/toolOutput/tool[@name="Exiftool"]/exiftool/FileModifyDate', } # Build xml document root mets_root = root # Get DIP object dip = DIP.objects.get(pk=self.dip_id) # Gather info for each file in filegroup "original" for target in mets_root.findall(".//fileGrp[@USE='original']/file"): # Create new dictionary for this item's info file_data = dict() # Create new list of dicts for premis events in file_data file_data['premis_events'] = list() # Gather amdsec id from filesec amdsec_id = target.attrib['ADMID'] file_data['amdsec_id'] = amdsec_id # Parse amdSec amdsec_xpath = ".//amdSec[@ID='{}']".format(amdsec_id) for target1 in mets_root.findall(amdsec_xpath): # Iterate over elements and write key, value for each to file_data dictionary for key, value in xml_file_elements.items(): try: file_data['{}'.format(key)] = target1.find(value).text except AttributeError: file_data['{}'.format(key)] = '' # Parse premis events related to file premis_event_xpath = ".//digiprovMD/mdWrap[@MDTYPE='PREMIS:EVENT']" for target2 in target1.findall(premis_event_xpath): # Create dict to store data premis_event = dict() # Create dict for names and xpaths of desired elements premis_key_values = { 'event_uuid': './xmlData/event/eventIdentifier/eventIdentifierValue', 'event_type': '.xmlData/event/eventType', 'event_datetime': './xmlData/event/eventDateTime', 'event_detail': './xmlData/event/eventDetail', 'event_outcome': './xmlData/event/eventOutcomeInformation/eventOutcome', 'event_detail_note': './xmlData/event/eventOutcomeInformation/eventOutcomeDetail/eventOutcomeDetailNote', } # Iterate over elements and write key, value for each to premis_event dictionary for key, value in premis_key_values.items(): try: premis_event['{}'.format(key)] = target2.find( value).text except AttributeError: premis_event['{}'.format(key)] = '' # Write premis_event dict to file_data file_data['premis_events'].append(premis_event) # Format filepath file_data['filepath'] = file_data['filepath'].replace( '%transferDirectory%', '') # Create human-readable size file_data['bytes'] = int(file_data['bytes']) file_data['size'] = '0 bytes' # Default to none if file_data['bytes'] != 0: file_data['size'] = convert_size(file_data['bytes']) # Create human-readable version of last modified Unix time stamp (if file was characterized by FITS) if file_data['fits_modified_unixtime']: unixtime = int(file_data['fits_modified_unixtime'] ) / 1000 # convert milliseconds to seconds file_data['modified_ois'] = datetime.datetime.fromtimestamp( unixtime).isoformat() # convert from unix to iso8601 else: file_data['modified_ois'] = '' # Add file_data to DigitalFile model digitalfile = DigitalFile( uuid=file_data['uuid'], filepath=file_data['filepath'], fileformat=file_data['format'], formatversion=file_data['version'], size_bytes=file_data['bytes'], size_human=file_data['size'], datemodified=file_data['modified_ois'], puid=file_data['puid'], amdsec=file_data['amdsec_id'], hashtype=file_data['hashtype'], hashvalue=file_data['hashvalue'], dip=dip, ) digitalfile.save() # Add premis events data to PREMISEvent model for event in file_data['premis_events']: premisevent = PREMISEvent( uuid=event['event_uuid'], eventtype=event['event_type'], datetime=event['event_datetime'], detail=event['event_detail'], outcome=event['event_outcome'], detailnote=event['event_detail_note'], digitalfile=DigitalFile.objects.get( uuid=file_data['uuid']), ) premisevent.save() # Gather dublin core metadata from most recent dmdSec dc_model = self.parse_dc(root) # Update DIP model object - not ispartof (hardset) if dc_model: if dc_model['title']: dip.dc.title = dc_model['title'] if dc_model['creator']: dip.dc.creator = dc_model['creator'] if dc_model['subject']: dip.dc.subject = dc_model['subject'] if dc_model['description']: dip.dc.description = dc_model['description'] if dc_model['publisher']: dip.dc.publisher = dc_model['publisher'] if dc_model['contributor']: dip.dc.contributor = dc_model['contributor'] if dc_model['date']: dip.dc.date = dc_model['date'] if dc_model['type']: dip.dc.type = dc_model['type'] if dc_model['format']: dip.dc.format = dc_model['format'] if dc_model['source']: dip.dc.source = dc_model['source'] if dc_model['language']: dip.dc.language = dc_model['language'] if dc_model['coverage']: dip.dc.coverage = dc_model['coverage'] if dc_model['rights']: dip.dc.rights = dc_model['rights'] dip.dc.save() # Trigger ES update dip.save()
def build_gz_figurine_req(name, frame_id, x, y, z, roll, pitch, yaw, width, height, depth, mass, color): """ create the gazebo SpawnModel service request for a figurine """ p = Pose() p.position.x = x p.position.y = y p.position.z = z q = transform.transformations.quaternion_from_euler(roll, pitch, yaw) p.orientation.x = q[0] p.orientation.y = q[1] p.orientation.z = q[2] p.orientation.w = q[3] model = SpawnModelRequest() model.model_name = name sdf = objectify.Element('sdf', version='1.4') sdf.model = objectify.Element('model', name=name) sdf.model.static = 'true' sdf.model.link = objectify.Element('link', name='link') sdf.model.link.inertial = objectify.Element('inertial') sdf.model.link.inertial.mass = mass xx = mass / 12.0 * (height * height + depth * depth) yy = mass / 12.0 * (width * width + depth * depth) zz = mass / 12.0 * (width * width + height * height) sdf.model.link.inertial.inertia = objectify.Element('inertia') sdf.model.link.inertial.inertia.ixx = xx sdf.model.link.inertial.inertia.iyy = yy sdf.model.link.inertial.inertia.izz = zz sdf.model.link.inertial.inertia.ixy = 0.0 sdf.model.link.inertial.inertia.iyz = 0.0 sdf.model.link.inertial.inertia.ixz = 0.0 sdf.model.link.collision = objectify.Element('collision', name='collision') sdf.model.link.collision.geometry = objectify.Element('geometry') sdf.model.link.collision.geometry.box = objectify.Element('box') sdf.model.link.collision.geometry.box.size = '{} {} {}'.format( width, height, depth) sdf.model.link.collision.surface = objectify.Element('surface') sdf.model.link.collision.surface.friction = objectify.Element('friction') sdf.model.link.collision.surface.friction.ode = objectify.Element('ode') sdf.model.link.collision.surface.friction.ode.mu = 1000000 sdf.model.link.collision.surface.friction.ode.mu2 = 1000000 sdf.model.link.collision.surface.friction.ode.fdir1 = '1.0 1.0 1.0' sdf.model.link.collision.surface.friction.ode.slip1 = 0.0 sdf.model.link.collision.surface.friction.ode.slip2 = 0.0 sdf.model.link.collision.surface.bounce = objectify.Element('bounce') sdf.model.link.collision.surface.bounce.restitution_coefficient = 0.0 sdf.model.link.collision.surface.bounce.threshold = 100000.0 sdf.model.link.collision.surface.contact = objectify.Element('contact') sdf.model.link.collision.surface.contact.ode = objectify.Element('ode') sdf.model.link.collision.surface.contact.ode.soft_cfm = 0.0 sdf.model.link.collision.surface.contact.ode.soft_erp = 0.2 sdf.model.link.collision.surface.contact.ode.kp = 10000000 sdf.model.link.collision.surface.contact.ode.kd = 1 sdf.model.link.collision.surface.contact.ode.max_vel = 0.0 sdf.model.link.collision.surface.contact.ode.min_depth = 0.001 sdf.model.link.visual = objectify.Element('visual', name='visual') sdf.model.link.visual.geometry = objectify.Element('geometry') sdf.model.link.visual.geometry.box = objectify.Element('box') sdf.model.link.visual.geometry.box.size = '{} {} {}'.format( width, height, depth) sdf.model.link.visual.material = objectify.Element('material') sdf.model.link.visual.material.script = objectify.Element('script') sdf.model.link.visual.material.script = objectify.Element('script') sdf.model.link.visual.material.script.uri = 'file://media/materials/scripts/gazebo.material' sdf.model.link.visual.material.script.name = 'Gazebo/{}'.format(color) objectify.deannotate(sdf) etree.cleanup_namespaces(sdf) model.model_xml = (etree.tostring(sdf, encoding='utf-8', xml_declaration=True)).decode("utf-8") model.robot_namespace = "figurine_spawner" model.initial_pose = p model.reference_frame = frame_id # return etree.tostring(sdf, encoding='utf-8', xml_declaration=True) return model
graphicFrame_xml = graphicFrame_tmpl % (sp_id, name, x, y, cx, cy) graphicFrame = objectify.fromstring(graphicFrame_xml) tbl = graphicFrame[qn('a:graphic')].graphicData.tbl for row in range(rows): # tr = sub_elm(tbl, 'a:tr', h=rowheight) tr = new('a:tr', h=rowheight) for col in range(cols): sub_elm(tbl.tblGrid, 'a:gridCol', w=colwidth) tr.append(empty_cell()) tbl.append(tr) objectify.deannotate(graphicFrame, cleanup_namespaces=True) print etree.tostring(graphicFrame, pretty_print=True) # ============================================================================ # early experiments with objectivy # ============================================================================ # def tagname(tag): # tag_parts = tag.split('}') # return tag_parts[1] # # def print_subtree(elm, indent=0): # indent_spaces = indent * ' ' # print '%s%s' % (indent_spaces, tagname(elm.tag)) # for child in elm.iterchildren(): # print_subtree(child, indent+2)
def render(self, pretty_print=True): objectify.deannotate(self.xml, xsi_nil=True) etree.cleanup_namespaces(self.xml) return etree.tostring(self.xml, pretty_print=pretty_print)
def _reload(obj): obj = objectify.fromstring(etree.tostring(obj)) objectify.deannotate(obj, xsi_nil=True, cleanup_namespaces=True) return obj
def create_attribute_metadata(self, field_names): """ Create the attribute metadata based on the field_names parameter Parameters ---------- field_names: list Field names for which to get metadata Returns ------- None """ p = self.parameter_parser # Get the metadata associated with the attribute data structure_fields, structure_codes = \ self.plot_db.get_structure_metadata(p.model_project) species_fields = \ self.plot_db.get_species_metadata() # Create the metadata XML xml_schema_file = \ 'http://lemma.forestry.oregonstate.edu/xml/stand_attributes.xsd' root_str = """ <attributes xmlns:xsi="%s" xsi:noNamespaceSchemaLocation="%s"/> """ root_str = root_str % ('http://www.w3.org/2001/XMLSchema-instance', xml_schema_file) root_elem = objectify.fromstring(root_str) for n in field_names: n = n.upper() other_fields = {} try: r = structure_fields[structure_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 0 other_fields['PROJECT_ATTR'] = r.PROJECT_ATTR other_fields['ACCURACY_ATTR'] = r.ACCURACY_ATTR except IndexError: try: r = species_fields[species_fields.FIELD_NAME == n][0] other_fields['SPECIES_ATTR'] = 1 other_fields['PROJECT_ATTR'] = 1 other_fields['ACCURACY_ATTR'] = 1 except IndexError: err_msg = n + ' has no metadata' print err_msg continue # Add the attribute element attribute_elem = etree.SubElement(root_elem, 'attribute') # Add all metadata common to both structure and species recarrays fields = ('FIELD_NAME', 'FIELD_TYPE', 'UNITS', 'DESCRIPTION', 'SHORT_DESCRIPTION') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = getattr(r, f) # Add special fields customized for structure and species fields = ('SPECIES_ATTR', 'PROJECT_ATTR', 'ACCURACY_ATTR') for f in fields: child = etree.SubElement(attribute_elem, f.lower()) attribute_elem[child.tag] = other_fields[f] # Print out codes if they exist if r.CODED == True: codes_elem = etree.SubElement(attribute_elem, 'codes') try: c_records = \ structure_codes[structure_codes.FIELD_NAME == n] except IndexError: #try: # c_records = \ # species_codes[species_codes.FIELD_NAME == n] #except IndexError: err_msg = 'Codes were not found for ' + n print err_msg continue for c_rec in c_records: code_elem = etree.SubElement(codes_elem, 'code') c_fields = ('CODE_VALUE', 'DESCRIPTION', 'LABEL') for c in c_fields: child = etree.SubElement(code_elem, c.lower()) code_elem[child.tag] = getattr(c_rec, c) tree = root_elem.getroottree() objectify.deannotate(tree) etree.cleanup_namespaces(tree) # Ensure that this tree validates against the schema file utilities.validate_xml(tree, xml_schema_file) # Write out this metadata file metadata_file = p.stand_metadata_file tree.write(metadata_file, pretty_print=True)
def new_txBody(): """Return a new ``<p:txBody>`` element tree""" xml = CT_TextBody._txBody_tmpl txBody = parse_xml_bytes(xml) objectify.deannotate(txBody, cleanup_namespaces=True) return txBody
def _write(self): objectify.deannotate(self._xml) etree.cleanup_namespaces(self._xml) xml_data = etree.tostring(self._xml, pretty_print=True) _write_storefile(self._path, XMLTransactionState.FILENAME, xml_data)
def parse_mets(self): """ Parse METS file and save data to METS model """ # create list original_files = [] original_file_count = 0 # get METS file name mets_filename = os.path.basename(self.path) # open xml file and strip namespaces tree = etree.parse(self.path) root = tree.getroot() for elem in root.getiterator(): if not hasattr(elem.tag, 'find'): continue # (1) i = elem.tag.find('}') if i >= 0: elem.tag = elem.tag[i + 1:] objectify.deannotate(root, cleanup_namespaces=True) # create dict for names and xpaths of desired info from individual files xml_file_elements = { 'filepath': './techMD/mdWrap/xmlData/object/originalName', 'uuid': './techMD/mdWrap/xmlData/object/objectIdentifier/objectIdentifierValue', 'hashtype': './techMD/mdWrap/xmlData/object/objectCharacteristics/fixity/messageDigestAlgorithm', 'hashvalue': './techMD/mdWrap/xmlData/object/objectCharacteristics/fixity/messageDigest', 'bytes': './techMD/mdWrap/xmlData/object/objectCharacteristics/size', 'format': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatDesignation/formatName', 'version': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatDesignation/formatVersion', 'puid': './techMD/mdWrap/xmlData/object/objectCharacteristics/format/formatRegistry/formatRegistryKey', 'fits_modified_unixtime': './techMD/mdWrap/xmlData/object/objectCharacteristics/objectCharacteristicsExtension/fits/fileinfo/fslastmodified[@toolname="OIS File Information"]', 'fits_modified': './techMD/mdWrap/xmlData/object/objectCharacteristics/objectCharacteristicsExtension/fits/toolOutput/tool[@name="Exiftool"]/exiftool/FileModifyDate' } # build xml document root mets_root = root # gather info for each file in filegroup "original" for target in mets_root.findall(".//fileGrp[@USE='original']/file"): original_file_count += 1 # create new dictionary for this item's info file_data = dict() # create new list of dicts for premis events in file_data file_data['premis_events'] = list() # gather amdsec id from filesec amdsec_id = target.attrib['ADMID'] file_data['amdsec_id'] = amdsec_id # parse amdSec amdsec_xpath = ".//amdSec[@ID='{}']".format(amdsec_id) for target1 in mets_root.findall(amdsec_xpath): # iterate over elements and write key, value for each to file_data dictionary for key, value in xml_file_elements.items(): try: file_data['{}'.format(key)] = target1.find(value).text except AttributeError: file_data['{}'.format(key)] = '' # parse premis events related to file premis_event_xpath = ".//digiprovMD/mdWrap[@MDTYPE='PREMIS:EVENT']" for target2 in target1.findall(premis_event_xpath): # create dict to store data premis_event = dict() # create dict for names and xpaths of desired elements premis_key_values = { 'event_uuid': './xmlData/event/eventIdentifier/eventIdentifierValue', 'event_type': '.xmlData/event/eventType', 'event_datetime': './xmlData/event/eventDateTime', 'event_detail': './xmlData/event/eventDetail', 'event_outcome': './xmlData/event/eventOutcomeInformation/eventOutcome', 'event_detail_note': './xmlData/event/eventOutcomeInformation/eventOutcomeDetail/eventOutcomeDetailNote' } # iterate over elements and write key, value for each to premis_event dictionary for key, value in premis_key_values.items(): try: premis_event['{}'.format(key)] = target2.find( value).text except AttributeError: premis_event['{}'.format(key)] = '' # write premis_event dict to file_data file_data['premis_events'].append(premis_event) # format filepath file_data['filepath'] = file_data['filepath'].replace( '%transferDirectory%', '') # format PUID if not 'fido' in file_data['puid'].lower(): file_data[ 'puid'] = "<a href=\"http://nationalarchives.gov.uk/PRONOM/%s\" target=\"_blank\">%s</a>" % ( file_data['puid'], file_data['puid']) # create human-readable size file_data['bytes'] = int(file_data['bytes']) file_data['size'] = '0 bytes' # default to none if file_data['bytes'] != 0: file_data['size'] = convert_size(file_data['bytes']) # create human-readable version of last modified Unix time stamp (if file was characterized by FITS) if file_data['fits_modified_unixtime']: unixtime = int(file_data['fits_modified_unixtime'] ) / 1000 # convert milliseconds to seconds file_data['modified_ois'] = datetime.datetime.fromtimestamp( unixtime).isoformat() # convert from unix to iso8601 else: file_data['modified_ois'] = '' # append file_data to original files original_files.append(file_data) # gather dublin core metadata from most recent dmdSec dc_metadata = self.parse_dc(root) # add file info to database mets_instance = METS(mets_filename, self.nickname, original_files, dc_metadata, original_file_count) db.session.add(mets_instance) db.session.commit()
def tostring(self): """Returns object as xml string""" objectify.deannotate(self._xml) etree.cleanup_namespaces(self._xml) return etree.tostring(self._xml, pretty_print=True)
def clean_html(csvfilename, htmlfilename): # read CSV, find good elements with open(csvfilename, "rb") as csvfile: csvin = UnicodeReader(skip_bom(csvfile)) table = [row for row in csvin] header, table = table[0], table[1:] i = header.index('xpath') j = header.index('goodness') goodxpaths = [row[i] for row in table if int(row[j])] logger.debug('%d good element found out of %d' % (len(goodxpaths), len(table))) assert(goodxpaths) # at least something assert(all(x and isinstance(x,basestring) for x in goodxpaths)) # read HTML build DOM tree parser = etree.HTMLParser(encoding='utf8', remove_blank_text=True, remove_comments=True, remove_pis=True) htmlstr = open(htmlfilename).read() dom = etree.fromstring(htmlstr, parser) domtree = etree.ElementTree(dom) objectify.deannotate(domtree, cleanup_namespaces=True) # scan DOM tree, remove bad stuff for elem in list(domtree.iter()): this_xpath = domtree.getpath(elem) if elem.attrib: keepattrib = ['href'] if elem.tag == 'a' else ['src','title','alt'] if elem.tag == 'img' else [] for k in elem.attrib: if k not in keepattrib: del elem.attrib[k] if not this_xpath: logger.debug('?? %s %s' % (elem, repr(this_xpath))) continue # no xpath found, probably deleted? elif elem.tag in ['script','meta']: # some element is removable for sure if not prune(elem): logger.error('Cannot find parent of %s' % this_xpath) else: #logger.debug('Removed %s' % this_xpath) pass elif this_xpath in goodxpaths: logger.debug('Keep good element %s' % this_xpath) continue # this is not boilerplate, keep it elif not prefixOfSomething(this_xpath, goodxpaths): # this is prefix of nothing -> unwanted child but perhaps has a good tail if not somethingIsPrefix(this_xpath, goodxpaths): # it is prefix of nothing -> safe to get rid of tail elem.tail = '' if not prune(elem): logger.error('Cannot find parent of %s' % this_xpath) else: logger.debug('Removed %s' % this_xpath) elif prefixOfSomething(this_xpath, goodxpaths): # it is prefix of something -> remove all inner text but retain children if not somethingIsPrefix(this_xpath, goodxpaths): # nothing is prefix of this -> remove tail as well if elem.tail: elem.tail = '' if elem.text: elem.text = '' for child in elem: if child.tail: child.tail = '' logger.debug('Removed text of %s but keep children' % this_xpath) else: logger.error('Unhandled element %s' % this_xpath) # more clean up: remove some elements all_done = False allow_empty = ['br','tr','img'] while not all_done: for elem in domtree.iter(): parent = elem.getparent() if parent is None: continue if len(elem) == 0 and isempty(elem.text) and elem.tag not in allow_empty: prune(elem) break if len(elem) == 1 and elem.tag == 'div' and isempty(elem.text) and isempty(elem[0].tail): delete(elem) break if elem.text and elem.tag not in ['pre','code']: elem.text = re.sub(r'\s+',' ',elem.text) if elem.tail and parent.tag not in ['pre','code']: elem.tail = re.sub(r'\s+',' ',elem.tail) else: # finished for-loop without break, i.e., without deleting nodes in domtree all_done = True # stringify cleaned HTML etree.strip_tags(domtree, 'span') # pandoc will keep span tag if not removed goodhtml = etree.tostring(domtree, encoding='utf-8', pretty_print=True, method="html") return goodhtml
def open_xml(p): from lxml import etree, objectify root = etree.parse(p.open_text()) objectify.deannotate(root, cleanup_namespaces=True) return XmlSheet(p.name, source=root)
def new_tc(): """Return a new ``<a:tc>`` element tree""" xml = CT_TableCell._tc_tmpl tc = oxml_fromstring(xml) objectify.deannotate(tc, cleanup_namespaces=True) return tc
def _return_to_standard_xml(v): # Remove lxml.objectify DataType namespace prefixes: objectify.deannotate(v) # Put the default namespace back: _reinsert_root_tag_prefix(v) etree.cleanup_namespaces(v)
def get_aligned_frames_xml(tokenized, frame_instances, root): # read DRG tuples = get_drg(tokenized) drgparser = drg.DRGParser() d = drgparser.parse_tup_lines(tuples) for instance_id, frame_instance in frame_instances.iteritems(): if len(frame_instance['roles']) > 0: try: framebase_id = "{0}-{1}".format( frame_instance['frame'], offset2wn[frame_instance['synset']].split("#")[0].replace( '-', '.')) except: log.info('No mapping found for synset {0}'.format( frame_instance['synset'])) continue tag_frameinstance = objectify.SubElement(root, "frameinstance") tag_frameinstance.attrib['id'] = instance_id tag_frameinstance.attrib['type'] = framebase_id tag_frameinstance.attrib['internalvariable'] = frame_instance[ 'variable'] for reificated_frame_var in d.reificated[ frame_instance['variable']]: tag_framelexicalization = objectify.SubElement( tag_frameinstance, "framelexicalization") surface = [] unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=False) tag_framelexicalization[0] = ' '.join(surface) tag_instancelexicalization = objectify.SubElement( tag_frameinstance, "instancelexicalization") surface = [] unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=True) tag_instancelexicalization[0] = ' '.join(surface) tag_frameelements = objectify.SubElement( tag_frameinstance, "frameelements") for role, (variable, filler) in frame_instance['roles'].iteritems(): tag_frameelement = objectify.SubElement( tag_frameelements, "frameelement") tag_frameelement.attrib['role'] = role tag_frameelement.attrib['internalvariable'] = variable tag_concept = objectify.SubElement(tag_frameelement, "concept") tag_concept[0] = filler try: for reificated_role_var in d.reificated[variable]: # composed lexicalization surface = unboxer.generate_from_relation( d, reificated_frame_var, reificated_role_var) if surface != None: tag_rolelexicalization = objectify.SubElement( tag_frameelement, "rolelexicalization") tag_rolelexicalization[0] = surface # complete surface forms surface = [] unboxer.generate_from_referent( d, reificated_role_var, surface, complete=True) tag_conceptlexicalization = objectify.SubElement( tag_frameelement, "conceptlexicalization") tag_conceptlexicalization[0] = ' '.join( surface) except: log.error( "error with DRG reification: {0}".format(variable)) objectify.deannotate(root, xsi_nil=True) etree.cleanup_namespaces(root) return etree.tostring(root, pretty_print=True)
def _strip_namespaces(self): root = etree.fromstring(etree.tostring(self.tree)) for element in root.getiterator(): element.tag = etree.QName(element).localname objectify.deannotate(root, cleanup_namespaces=True) return root
def getvalue(self, serialize=True): """ Gets the actual payload's value converted to a string representing either XML or JSON. """ if self.zato_is_xml: if self.zato_is_repeated: value = Element('item_list') else: value = Element('item') else: if self.zato_is_repeated: value = [] else: value = {} if self.zato_is_repeated: output = self.zato_output else: output = set(dir(self)) & self.zato_all_attrs output = [dict((name, getattr(self, name)) for name in output)] if output: # All elements must be of the same type so it's OK to do it is_sa_namedtuple = isinstance(output[0], NamedTuple) for item in output: if self.zato_is_xml: out_item = Element('item') else: out_item = {} for is_required, name in chain(self.zato_required, self.zato_optional): leave_as_is = isinstance(name, AsIs) elem_value = self._getvalue(name, item, is_sa_namedtuple, is_required, leave_as_is) if isinstance(name, ForceType): name = name.name if isinstance(elem_value, basestring): elem_value = elem_value.decode('utf-8') if self.zato_is_xml: setattr(out_item, name, elem_value) else: out_item[name] = elem_value if self.zato_is_repeated: value.append(out_item) else: value = out_item if self.zato_is_xml: em = ElementMaker(annotate=False, namespace=self.namespace, nsmap={None: self.namespace}) zato_env = em.zato_env(em.cid(self.zato_cid), em.result(ZATO_OK)) top = getattr(em, self.response_elem)(zato_env) top.append(value) else: top = {self.response_elem: value} if serialize: if self.zato_is_xml: deannotate(top, cleanup_namespaces=True) return etree.tostring(top) else: return dumps(top) else: return top
elem.text = f"@color/{color_element_name}" style.append(elem) color = "black" colorBackground = "#010101" prefix = f"ColorPatch.{color.capitalize()}" style = Element("style", {"name": prefix+"_Background"}) styles_root.append(style) elem = Element("item", {"name": "android:colorBackground"}) elem.text = colorBackground style.append(elem) elem = Element("item", {"name": "colorSurface"}) elem.text = colorBackground style.append(elem) objectify.deannotate(styles_root, cleanup_namespaces=True) etree.ElementTree(styles_root)\ .write(path_dest_styles_xml, encoding='utf-8', pretty_print=True, xml_declaration=True) objectify.deannotate(colors_root, cleanup_namespaces=True) etree.ElementTree(colors_root)\ .write(path_dest_colors_xml, encoding='utf-8', pretty_print=True, xml_declaration=True) # kt primaryStyles = [] secondaryStyles = [] backgroundStyles = []