def __init__(self, parent, dom): super(Interface, self).__init__(parent, dom) # build lists of requests, etc., in this interface self.requests = build_list(self, Request, dom.getElementsByTagName('request')) self.events = build_list(self, Event, dom.getElementsByTagName('event')) self.enums = build_list(self, Enum, dom.getElementsByTagName('enum'))
def DetermineMDType(RawXMLFile): ''' A series of tests is performed to determine the format of the metadata record (these tests look at the structure and content of the xml for characteristics unique to certain formatting). ''' dom = minidom.parse(RawXMLFile) XMLElements = [node.tagName for node in dom.getElementsByTagName("*")] arcpy.AddMessage("A series of tests will now be performed to determine the current format of the metadata record... \n") #Check 1. Determine if metadata contains information identifying it as being an Arc10 style record. if "mdStanName" in XMLElements: dom = minidom.parse(RawXMLFile) mdStanName = dom.getElementsByTagName('mdStanName')[0].firstChild.data if str(mdStanName) == "ArcGIS Metadata": arcpy.AddMessage("The 'mdStanName' element was found in the data layer and contains the value 'ArcGIS Metadata.' This data layer has been determined to have Arc10 style Metadata. \n") del dom return "Arc10" else: arcpy.AddMessage("The 'mdStanName' element was found in the data layer but did not contain the value 'ArcGIS Metadata.' Subsequent checks will be performed to determine the format of the metadata. \n") #else: print "The element 'mdStanName' was not found in the XML file. Subsequent checks will be performed to determine the format of the metadata. \n" #Check 2. Determine if metadata has 1 or more of several elements unique to Arc10 style metadata records. KeyElementCheckList = ["idPurp", "idAbs", "idCredit", "searchKeys"] KeyElementCounter = 0 for KeyElement in KeyElementCheckList: if KeyElement in XMLElements: KeyElementCounter = KeyElementCounter + 1 if KeyElementCounter > 0: arcpy.AddMessage("Out of 4 elements unique to Arc10 style metadata ('idPurp,' idAbs,' idCredit,' and 'searchKeys') " + str(KeyElementCounter) + " were found. This data layer has been determined to have Arc10 style metadata. \n") #print "MetadataType Variable = " + MetadataType return "Arc10" # else: print "Of 4 elements unique to Arc10 style metadata, none could be found. Subsequent checks will be performed to determine the format of the metadata. \n" #Check 3. Determine if metadata has 1 or more of several elements unique to FGDC style records, in a particular structure. try: idinfo = dom.getElementsByTagName("idinfo")[0] citation = idinfo.getElementsByTagName('citation')[0] citeinfo = citation.getElementsByTagName('citeinfo')[0] metainfo = dom.getElementsByTagName("metainfo")[0] metstdn = metainfo.getElementsByTagName("metstdn")[0] if not citeinfo is None and not metstdn is None: arcpy.AddMessage("Based on certain characteristics of the xml, this metadata record has been identified as an FGDC-style record. \n") return "FGDC" else: return "Unknown" except: return "Unknown"
def load_from_xml(self, dom): for tagname in STYLE_TAGS: for style in dom.getElementsByTagName(tagname): if style.nodeType == xml.dom.Node.ELEMENT_NODE: styleobj = OdtStyle(style) self.__styles[styleobj.name] = styleobj for tagname in FONT_TAGS: for font in dom.getElementsByTagName(tagname): if font.nodeType == xml.dom.Node.ELEMENT_NODE: fontobj = Font(font) self.__fonts[fontobj.name] = fontobj
def _ReadXMLFile(self, fname): """ Reads in an XML file. Args: name of file Return : (# users added, # users excluded) Users are excluded primarily for lack of a "dn" attribute """ f = codecs.open(fname, "r", "utf-8") dom = xml.dom.minidom.parseString(f.read()) users = dom.getElementsByTagName("user") added = 0 excluded = 0 if len(self.attrs): enforceAttrList = True else: enforceAttrList = False for user in users: dn, db_user = self._ReadUserXML(user) if not dn: excluded += 1 else: self._ReadAddUser(dn, db_user, enforceAttrList) added += 1 f.close() return (added, excluded)
def _getGlyphPaths(self, dom): symbols = dom.getElementsByTagName('symbol') glyphPaths = {} for s in symbols: pathNode = [p for p in s.childNodes if 'tagName' in dir(p) and p.tagName == 'path'] glyphPaths[s.getAttribute('id')] = pathNode[0].getAttribute('d') return glyphPaths
def get_attribute_values(dom, tag, attribute, masks): '''Returns a list of found values of attributes under the given tag that matches any of masks''' if dom is None: return None values = [] tag_nodes = dom.getElementsByTagName(tag) for tag_node in tag_nodes: if tag_node.hasAttribute(attribute): # TODO: values may be found with a condition # Example: <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> # so we should take into account this case value = tag_node.getAttribute(attribute) if not masks: # allow any dependency values.append(value) else: # search for matches under mask for mask in masks: if value.lower().endswith(mask): values.append(value) break return values
def __init__(self, file): self.fileToAnalise = file dom = FromXmlFile(file) doc = dom.documentElement objs = dom.getElementsByTagName("include") for obj in objs: temp = string.split(obj.getAttribute("url"), "/") inc_file = temp[len(temp) - 1] temp1 = string.split(file, "/") real_path = "" for i in range(0, len(temp1) - 1): real_path += "/" + temp1[i] # print "real_path="+real_path inc_file = str(real_path) + "/" + str(inc_file) print "Найден подключаемый файл", inc_file dom1 = None try: doc1 = FromXmlFile(inc_file) dom1 = doc1.documentElement except Exception, msg: print "WARNING: ", msg if dom1 != None: includedNode = dom.importNode(dom1, 1) doc.appendChild(includedNode)
def load(self): d1 = self.dbhelper.dbRead(self.om_db, "xml_date") d2 = os.stat(self.defFile).st_mtime print "XML_CSL_LOAD", d1, d2 if d1 != None: if d1 > d2: # FIXME: hata vermek yerine db'leri sifirlayip yeniden olustur print "XML_CSL: OOPS, xml om definition is newer than om db" print "XML_CSL: please remove db files manually" return else: return print "XML_CSL: creating om DBs..." try: dom = xml.dom.minidom.parse(self.defFile) except: print "OMDB: cannot parse '%s'" % (self.defFile) return 0 if dom.documentElement.localName != "comar-om": print "OMDB: '%s' is not a COMAR om dtd" % (self.defFile) return 0 ns = dom.getElementsByTagName("namespace")[0] # FIXME: namespace in useNS ile ayni oldugunu dogrula self.namespace = ns.getAttribute("name") print "Adding OM Keys:", for node in ns.childNodes: self.load_node(node) dom.unlink() if d1 == None: self.dbhelper.dbWrite(self.om_db, "xml_date", str(d2)) return 1
def _parse_gconf_schema(schema_file): ret = {} dom = xml.dom.minidom.parse(schema_file) for gconfschemafile in dom.getElementsByTagName('gconfschemafile'): for schemalist in gconfschemafile.getElementsByTagName('schemalist'): for schema in schemalist.getElementsByTagName('schema'): try: key = schema.getElementsByTagName('applyto')[0].childNodes[0].data except IndexError: # huh, no <applyto>? let's use <key>; it has /schemas/ # prefix, but it should be clear enough key = schema.getElementsByTagName('key')[0].childNodes[0].data if key.startswith('/schemas/'): key = key[8:] type = schema.getElementsByTagName('type')[0].childNodes[0].data try: default = schema.getElementsByTagName('default')[0].childNodes[0].data if type == 'bool': if default.lower() == 'true': ret[key] = 'true' else: ret[key] = 'false' else: ret[key] = default except IndexError: ret[key] = '' # no gconf default return ret
def _parse_gconf_schema(schema_file): ret = {} dom = xml.dom.minidom.parse(schema_file) for gconfschemafile in dom.getElementsByTagName('gconfschemafile'): for schemalist in gconfschemafile.getElementsByTagName('schemalist'): for schema in schemalist.getElementsByTagName('schema'): key = schema.getElementsByTagName( 'applyto')[0].childNodes[0].data type = schema.getElementsByTagName( 'type')[0].childNodes[0].data try: default = schema.getElementsByTagName( 'default')[0].childNodes[0].data if type == 'bool': if default: ret[key] = 'true' else: ret[key] = 'false' else: ret[key] = default except IndexError: ret[key] = '' # no gconf default return ret
def collect_meshes(dom, dirname): meshes = {} meshparser = xml.sax.make_parser() meshhandler = ogre_import.OgreMeshSaxHandler() meshparser.setContentHandler(meshhandler) meshlist = dom.getElementsByTagName("entity") for meshnode in meshlist: if meshnode.nodeType == xml.dom.Node.ELEMENT_NODE: meshfilename = meshnode.getAttribute("meshFile") if meshfilename != "": meshfile = Blender.sys.join(dirname, meshfilename) # is this a mesh file instead of an xml file? if (meshfilename.lower().find('.xml') == -1): meshfilename = meshfilename[0:meshfilename.lower(). find('.mesh')] else: meshfilename = meshfilename[0:meshfilename.lower(). find('.mesh.xml')] dlog("Meshfilename %s" % meshfilename) if (meshes.has_key(meshfilename) == False): dlog("Not cached yet") mesh = parseMesh(meshfile, meshparser, meshhandler) if mesh != None: mesh.filename = meshfile mesh.name = meshnode.getAttribute("name") meshes[meshfilename] = mesh else: dlog("Cached") log("Mesh import done") return meshes
def appendStoryallXml(self, xmlFile, templates_path,result_path): os.system("cp "+xmlFile+" "+xmlFile+".old") doc=FromXmlFile(xmlFile) dom=doc.getElementsByTagName("root").item(0) for file in os.listdir(templates_path): if re.search("\.template\.xml",file) and not re.search("impulses\.", file): try: ff = open(result_path+"/"+file.replace(".template",""),'r') ff.close() include = doc.createElement("include") include.setAttribute("url",file.replace(".template","")) includes = dom.getElementsByTagName("include") isAppend = True for inc in includes: if inc.getAttribute("url") == file.replace(".template",""): isAppend = False if isAppend: dom.appendChild(include) newStr=doc.createTextNode("\n") dom.appendChild(newStr) print "В storyall.xml добавлена нода "+str(include) else: print "В storyall.xml уже имеется нода <include url='"+file.replace(".template","")+"' />" except: continue file=open(xmlFile,'w') Print(doc,file,"utf-8") print "Записан файл ", str(xmlFile)
def appendTsXml(self, tsXmlFile): os.system("cp "+tsXmlFile+" "+tsXmlFile+".old") doc=FromXmlFile(tsXmlFile) dom=doc.getElementsByTagName("g").item(0) for file in os.listdir(tsXmlFile.replace("/ts.xml","")): if re.search("impulses\.",file) and re.search("\.xml", file) and not re.search("\.old", file): include = doc.createElement("include") include.setAttribute("url",file) includes = dom.getElementsByTagName("include") isAppend = True for inc in includes: if inc.getAttribute("url") == file: isAppend = False if isAppend: dom.appendChild(include) newStr=doc.createTextNode("\n") dom.appendChild(newStr) #Obj.sig(self.out,_("В ts.xml добавлена нода "+str(include))) print _("В ts.xml добавлена нода "+str(include)) else: #Obj.sig(self.out,_("В ts.xml уже имеется нода <include url='"+file+"' />")) print _("В ts.xml уже имеется нода <include url='"+file+"' />") file=open(tsXmlFile,'w') Print(doc,file,"utf-8") #Obj.sig(self.out,_("Записан файл "+str(tsXmlFile))) print "Записан файл ", str(tsXmlFile)
def __init__(self, parent, dom): super(Request, self).__init__(parent, dom) self.type = dom.getAttribute('type') self.args = build_list(self, Arg, dom.getElementsByTagName('arg'))
def include(self): # Записать эти файлы в storyall.xml try: doc = FromXmlFile("storyall.xml") #Print (doc) dom = doc.documentElement inc = doc.createElement("include") inc.setAttribute("url", self.toFile) isIncAlready=False incs = dom.getElementsByTagName("include") for i in incs: if i.getAttribute("url")==inc.getAttribute("url"): isIncAlready=True break if not isIncAlready: dom.appendChild(inc) dom.appendChild(doc.createTextNode("\n")) f=open("storyall.xml", 'w') Print(doc, f, "utf-8") except Exception, msg: print "Strelka-Canceler::include(): EXCEPTION (while writing to 'storyall.xml'):", msg
def getDomains(targets,release): import urllib from xml.dom.minidom import parse import xml.dom import pickle pfamDict ={} ## Loop through all targets and get pfam domains. errors = [] for target in targets: #print "getting Pfam domains for %s" % target pfamDict[target] = {} pfamDict[target]["domains"] = [] pfamDict[target]["start"] = [] pfamDict[target]["end"] = [] opener = urllib.FancyURLopener({}) f = opener.open("http://pfam.sanger.ac.uk/protein/%s?output=xml" % target) dom = parse(f) if not dom.getElementsByTagName('sequence'): #print "encountered Error for %s" %target errors.append(target) del pfamDict[target] continue for pfam in dom.childNodes: if pfam.nodeName == 'pfam': for entry in pfam.childNodes: if entry.nodeName == 'entry': for matches in entry.childNodes: if matches.nodeName == 'matches': for match in matches.childNodes: if match.nodeName == 'match': if match.getAttribute('type') == 'Pfam-A': pfamDict[target]['domains'].append(match.getAttribute('id')) for location in match.childNodes: if location.nodeName == 'location': start = location.getAttribute('start') end = location.getAttribute('end') pfamDict[target]['start'].append(int(start)) pfamDict[target]['end'].append(int(end)) dom.unlink() # Add domain count. pfamDict[target]['count'] = len(pfamDict[target]['domains']) # Calculate and add the uniq count of domains. uniqDomains = {} for domain in pfamDict[target]['domains']: uniqDomains[domain] = 0 pfamDict[target]['countUnique'] = len(uniqDomains) ## Pickle the PfamDict output = open('data/protCodPfamDict_%s.pkl' %release, 'w') pickle.dump(pfamDict, output) print "encountered Error for", errors return pfamDict
def getNamesArray(self, resultDoc): existedNames=[] dom=resultDoc.getElementsByTagName("root").item(0) objs=dom.getElementsByTagName("obj") for obj in objs: if obj.hasAttribute("name"): existedNames.append(obj.getAttribute("name")) #print " - "+obj.getAttribute("name") return existedNames
def test_names(testdir): """Assert test names are informative about what file was tested """ file = testdir.makepyfile('def hello():\n print("Hello, world!")') file.write(data="\n", mode="a") testdir.runpytest("--black", "--junit-xml=test-output.xml") dom = parse((testdir.tmpdir / "test-output.xml").open()) test_case = dom.getElementsByTagName("testcase")[0] assert "test_names.py" in test_case.getAttribute("name")
def test_0060_RequirementTags(self): """Check that requirements have the correct tags""" nodes = dom.getElementsByTagName('requirement') self.assertEqual(len(nodes), 17) nodenames = ['title', 'priority', 'status', 'version', 'complexity', 'assigned', 'effort', 'category', 'description', 'origin', 'rationale', 'relatedfeatures', 'relatedrequirements', 'relatedusecases', 'relatedtestcases', 'changelist', 'taglist'] for node in nodes[0].childNodes: if node.nodeType == node.TEXT_NODE: continue self.failUnless(node.nodeName in nodenames, 'invalid node <%s>' % node.nodeName) nodenames.remove(node.nodeName) self.failUnlessEqual(len(nodenames), 0, 'missing nodes %s' % str(nodenames))
def _get_dom_nodes_values_by_tag(dom, tag): result = [] nodes = dom.getElementsByTagName(tag) for node in nodes: value = MSBuildXmlProject._get_dom_node_value(node) if value: result.append(value) return result
def test_0050_FeatureTags(self): """Check that features have the correct tags""" nodes = dom.getElementsByTagName('feature') self.assertEqual(len(nodes), 5) nodenames = ['title', 'priority', 'status', 'version', 'risk', 'description', 'relatedrequirements', 'relatedusecases', 'changelist', 'taglist'] for node in nodes[0].childNodes: if node.nodeType == node.TEXT_NODE: continue self.failUnless(node.nodeName in nodenames, 'invalid node %s' % node.nodeName) nodenames.remove(node.nodeName) self.failUnlessEqual(len(nodenames), 0, str(nodenames))
def getSourceCode(source): nodelist = dom.getElementsByTagName('Record') collect = dom.getElementsByTagName('Collection') for colt in collect: if colt.parentNode.getAttribute('Identifier') == 'V0S0P1': if colt.getAttribute('Type') == 'CustomProperty': sqlrecord = [record.getElementsByTagName('Property') \ for record in colt.getElementsByTagName('SubRecord') \ if record.getElementsByTagName('Property')[0].firstChild.data == 'USERSQL'] print(sqlrecord) prop = colt.getElementsByTagName('Property') for p in prop: proptxt = prop[prop.index(p) - 1].firstChild if proptxt.nodeType == xml.dom.Node.TEXT_NODE: if proptxt.data == 'USERSQL': print(p.firstChild.data) if colt.getAttribute('Type') == 'OutputColumn': columns = [x.firstChild.data for x in colt.getElementsByTagName('Property')\ if x.getAttribute('Name') == 'Name'] print(columns)
def _check_bug_patterns(report, patterns): try: dom = xml.dom.minidom.parseString(patterns) except ExpatError: return None for pattern in dom.getElementsByTagName('pattern'): url = _check_bug_pattern(report, pattern) if url: return url return None
def parse_and_store_data(response, start_date): """Parsing XML data from San Francisco's Open311 endpoint and storing it in a postgres database""" import xml.dom reqs = [] # Lookup table: use a set since we don't need to associate the the attributes with values # May want to add 'updated' flag relevant_attrs = {'service_request_id', 'status', 'service_name', 'service_code', 'description', 'requested_datetime', 'updated_datetime','expected_datetime', 'address', 'zipcode', 'lat', 'long'} try: print 'response' dom = minidom.parse(response) except xml.parsers.expat.ExpatError: print 'Expat error' append_log('err_log.txt', 'ExpatError. Start date: ' + days[start.weekday()] + ', ' + start.strftime('%Y-%m-%d')) return for node in dom.getElementsByTagName('request'): req_obj = {} for attr in node.childNodes: if attr.nodeType != xml.dom.Node.ELEMENT_NODE: continue if attr.childNodes: if attr.tagName in relevant_attrs: # http://wiki.postgresql.org/wiki/Introduction_to_VACUUM,_ANALYZE,_EXPLAIN,_and_COUNT // Don't insert null value? req_obj[attr.tagName] = attr.childNodes[0].data or None # will this work? # Check if you have a complete set of data for the request for relevant_attr in relevant_attrs: if relevant_attr not in req_obj: req_obj[relevant_attr] = None # To insert null values either omit the field from the insert statement or use None # Rename the long attribute req_obj['lon'] = req_obj['long'] del req_obj['long'] #print req_obj['zipcode'] if req_obj['zipcode']: if not is_number(req_obj['zipcode']): req_obj['zipcode'] = None if float(req_obj['lat']) > 35 and float(req_obj['lon']) < -121: reqs.append(req_obj) append_log('log.txt', str(len(reqs)) + ' requests, start date: ' + start.isoformat() + ', ' + str(datetime.datetime.utcnow()) + '\n') #print 'reqs', reqs update_database(reqs)
def _ParseDatetimeIntoSecs(dom, tag): """Returns the tag body parsed into seconds-since-epoch.""" el = dom.getElementsByTagName(tag) if not el: return None assert el[0].getAttribute('type') == 'datetime' data = el[0].firstChild.data # Tracker emits datetime strings in UTC or GMT. # The [:-4] strips the timezone indicator when = time.strptime(data[:-4], '%Y/%m/%d %H:%M:%S') # calendar.timegm treats the tuple as GMT return calendar.timegm(when)
def __init__(self, doc): """reads tre structure of the xml document doc : a string that contains an xml document""" self.doc = doc self.modules = [] dom = parseString(doc) root = dom.getElementsByTagName("project")[0] for rootMod in root.childNodes: r = ModuleFactory.makeModule(rootMod) if isinstance(r, Module): self.modules.append(r)
def xml_object_extract(xml_file, image_file, save_path, object_classes=None, min_size_sum=100, w_h_limits=(10, 0.1)): """ extract specified object from xml_file Args: xml_file: str, xml path image_file: str, image path save_path: str , 保存路径 object_classes: List or None, object class to extract, None means all classes min_size_sum: min sum of width and height of object w_h_limits: length-width ratio limit Returns: """ def get_object_from_node(obj_element): name = obj_element.getElementsByTagName("name")[0].firstChild.data xmin = obj_element.getElementsByTagName("xmin")[0].firstChild.data ymin = obj_element.getElementsByTagName("ymin")[0].firstChild.data xmax = obj_element.getElementsByTagName("xmax")[0].firstChild.data ymax = obj_element.getElementsByTagName("ymax")[0].firstChild.data return name, int(xmin), int(ymin), int(xmax), int(ymax) dom_tree = xml.dom.minidom.parse(xml_file) dom = dom_tree.documentElement objects = dom.getElementsByTagName("object") img = read_with_rgb(image_file) for obj in objects: # print("提取目标") name, xmin, ymin, xmax, ymax = get_object_from_node(obj) w, h = xmax - xmin, ymax - ymin if object_classes: if name not in object_classes: continue if (xmax + ymax - xmin - ymin) <= min_size_sum: continue try: w_h = w / h if w_h > w_h_limits[0] or w_h < w_h_limits[1]: continue img_crop = img.crop((xmin, ymin, xmax, ymax)) save_dir = save_path + "/" + name os.makedirs(save_dir, exist_ok=True) crop_file = os.path.join(save_dir, os.path.basename(image_file)) img_crop.save(crop_file) except ZeroDivisionError: logging.warning( "ZeroDivisionError: Object width or height is zero, please correct your annonation" )
def from_xml(cls, src): """Generate instance of subclass from an XML string.""" dom = xml.dom.minidom.parseString(src) # Make sure all of the necessary pieces are there. Fail if any # required tags are missing xc = cls() for tag_name, default_or_type in xc.required_tags(): elem = dom.getElementsByTagName(tag_name) if not elem: raise ValueError("Missing XML tag: " + tag_name) tag_type = (default_or_type if isinstance(default_or_type, type) else type(default_or_type)) xc[tag_name] = tag_type(cls._get_text(elem[0].childNodes)) return xc
def test_0050_FeatureTags(self): """Check that features have the correct tags""" nodes = dom.getElementsByTagName('feature') self.assertEqual(len(nodes), 5) nodenames = [ 'title', 'priority', 'status', 'version', 'risk', 'description', 'relatedrequirements', 'relatedusecases', 'changelist', 'taglist' ] for node in nodes[0].childNodes: if node.nodeType == node.TEXT_NODE: continue self.failUnless(node.nodeName in nodenames, 'invalid node %s' % node.nodeName) nodenames.remove(node.nodeName) self.failUnlessEqual(len(nodenames), 0, str(nodenames))
def allAntProjectFiles() : antProjectFiles = [] for root, dirs, files in os.walk('.'): for filename in glob.glob(root + '/build.xml'): if len(filename) < 40 and filename.find('test.xml') == -1: try : dom = parse(filename) project = dom.getElementsByTagName("project")[0] if(project is not None): antProjectFiles.append(filename) except: continue return antProjectFiles
def parse_meta(self, dom): ret = {} for metaset in dom.getElementsByTagName("office:meta"): for child in metaset.childNodes: if child.nodeType == xml.dom.Node.ELEMENT_NODE: if child.tagName[:3] == "dc:": ret[child.tagName[3:]] = self.get_node_text(child) elif child.tagName == "meta:user-defined": ret[child.getAttribute("meta:name")] = self.get_node_text(child) pass elif child.tagName == "meta:document-statistic": # ignore pass elif child.tagName[:5] == "meta:": ret[child.tagName[5:]] = self.get_node_text(child) return ret
def main(): iFile = urllib2.urlopen(URL) contents = iFile.read() iFile.close() dom = xml.dom.minidom.parseString(contents) items = dom.getElementsByTagName("item") for item in items: title = getText(item, "title").strip() link = getText(item, "link").strip() key = getText(item, "key").strip() title = re.sub(r'^\[.*?\]', "", title, 1).strip() title = re.sub(r'^\[.*?\]', "", title, 1).strip() print '<li><a href="%s">%s</a> - %s' % (link, key, title)
def test_0060_RequirementTags(self): """Check that requirements have the correct tags""" nodes = dom.getElementsByTagName('requirement') self.assertEqual(len(nodes), 17) nodenames = [ 'title', 'priority', 'status', 'version', 'complexity', 'assigned', 'effort', 'category', 'description', 'origin', 'rationale', 'relatedfeatures', 'relatedrequirements', 'relatedusecases', 'relatedtestcases', 'changelist', 'taglist' ] for node in nodes[0].childNodes: if node.nodeType == node.TEXT_NODE: continue self.failUnless(node.nodeName in nodenames, 'invalid node <%s>' % node.nodeName) nodenames.remove(node.nodeName) self.failUnlessEqual(len(nodenames), 0, 'missing nodes %s' % str(nodenames))
def parse(self, fname): """ """ g = None with open(fname, 'r') as f: dom = minidom.parse(f) root = dom.getElementsByTagName("graphml")[0] graph = root.getElementsByTagName("graph")[0] name = graph.getAttribute('id') g = Graph(name) # # Get attributes # attributes = [] # for attr in root.getElementsByTagName("key"): # attributes.append(attr) # Get nodes for node in graph.getElementsByTagName("node"): n = g.add_node(id=node.getAttribute('id')) for child in node.childNodes: if child.nodeType == xml.dom.Node.ELEMENT_NODE and child.tagName == "data": if child.firstChild: n[child.getAttribute( "key")] = child.firstChild.data else: n[child.getAttribute("key")] = "" # Get edges for edge in graph.getElementsByTagName("edge"): source = edge.getAttribute('source') dest = edge.getAttribute('target') # source/target attributes refer to IDs: http://graphml.graphdrawing.org/xmlns/1.1/graphml-structure.xsd e = g.add_edge_by_id(source, dest) for child in edge.childNodes: if child.nodeType == xml.dom.Node.ELEMENT_NODE and child.tagName == "data": if child.firstChild: e[child.getAttribute( "key")] = child.firstChild.data else: e[child.getAttribute("key")] = "" return g
def _GetDataFromTag(dom, tag): """Retrieve value associated with the tag, if any. Args: dom: XML DOM object tag: name of the desired tag Returns: None (if tag doesn't exist), empty string (if tag exists, but body is empty), or the tag body. """ tags = dom.getElementsByTagName(tag) if not tags: return None elif tags[0].hasChildNodes(): return tags[0].firstChild.data else: return ''
def main(): iFile = urllib2.urlopen(URL) contents = iFile.read() iFile.close() dom = xml.dom.minidom.parseString(contents) items = dom.getElementsByTagName("item") for item in items: title = getText(item, "title").strip() link = getText(item, "link").strip() key = getText(item, "key").strip() title = re.sub(r'^\[.*?\]',"",title,1).strip() title = re.sub(r'^\[.*?\]',"",title,1).strip() print '<li><a href="%s">%s</a> - %s' % (link, key, title)
def _ReadUserXML(self, dom): """ Read in a single <user> element. Args: user : the DOM tree for a <user> element Return: dictionary, where keys are the element names and the values are the text values of the elements, if any """ user = {} dn = GetTextFromNodeList(dom.getElementsByTagName("DN")) if not dn: return (None, None) for child in dom.childNodes: if child.nodeType != xml.dom.Node.ELEMENT_NODE: continue # the DN is special; don't include that if child.tagName != "DN": self._SaveElement(child, user) return (dn, user)
def readXML(self, filename): dom = xml.dom.minidom.parse(filename) msh = dom.getElementsByTagName('mesh') if(len(msh) < 1): sys.exit(" ERROR: No element with tag name 'mesh' found!") self.dim = int (msh[0].getAttribute('dim')) h = float(msh[0].getElementsByTagName('h')[0].childNodes[0].data.strip()) for p in msh[0].getElementsByTagName('patch'): x = [] [ x.append(float(z)) for z in p.childNodes[0].data.strip().split(' ')] self.add(Cuboid(self.dim,x[0:3],x[3:6])) dom.unlink() self.createMesh(h)
def parse_dom(dom): """Parse dom into a Graph. :param dom: dom as returned by minidom.parse or minidom.parseString :return: A Graph representation """ root = dom.getElementsByTagName("graphml")[0] graph = root.getElementsByTagName("graph")[0] name = graph.getAttribute('id') g = Graph(name) # # Get attributes # attributes = [] # for attr in root.getElementsByTagName("key"): # attributes.append(attr) # Get nodes for node in graph.getElementsByTagName("node"): n = g.add_node(id=node.getAttribute('id')) for attr in node.getElementsByTagName("data"): if attr.firstChild: n[attr.getAttribute("key")] = attr.firstChild.data else: n[attr.getAttribute("key")] = "" # Get edges for edge in graph.getElementsByTagName("edge"): source = edge.getAttribute('source') dest = edge.getAttribute('target') # source/target attributes refer to IDs: http://graphml.graphdrawing.org/xmlns/1.1/graphml-structure.xsd e = g.add_edge_by_id(source, dest) for attr in edge.getElementsByTagName("data"): if attr.firstChild: e[attr.getAttribute("key")] = attr.firstChild.data else: e[attr.getAttribute("key")] = "" return g
def checkSentence(essaySentence): """Check the sentence usage errors using LanguageTool. """ logging.debug("checkSentence start.") essaySentence.ltCheckResults = [] params = (('language', 'en'), ('text', essaySentence.sentence)) req = 'http://localhost:8081/?' + urllib.urlencode(params) try: rep = urllib2.urlopen(req) except: logging.error("checkSentence error") return repstr = rep.read() dom = parseString(repstr) errors = dom.getElementsByTagName('error') for error in errors: checkResult = {} checkResult['fromy'] = error.attributes["fromy"].nodeValue checkResult['fromx'] = error.attributes["fromx"].nodeValue checkResult['toy'] = error.attributes["toy"].nodeValue checkResult['tox'] = error.attributes["tox"].nodeValue checkResult['ruleId'] = error.attributes["ruleId"].nodeValue checkResult['msg'] = error.attributes["msg"].nodeValue checkResult['replacements'] = error.attributes[ "replacements"].nodeValue checkResult['context'] = error.attributes["context"].nodeValue checkResult['contextoffset'] = error.attributes[ "contextoffset"].nodeValue checkResult['errorlength'] = error.attributes["errorlength"].nodeValue checkResult['errorbefore'] = checkResult[ 'context'][:int(checkResult['contextoffset'])] checkResult['errorme'] = checkResult[ 'context'][int(checkResult['contextoffset'] ):int(checkResult['contextoffset']) + int(checkResult['errorlength'])] checkResult['errorafter'] = checkResult[ 'context'][int(checkResult['contextoffset']) + int(checkResult['errorlength']):] if not checkResult['ruleId'] in IgnoredLtRuleIds: essaySentence.ltCheckResults.append(checkResult) logging.debug("checkSentence end.")
def loadOptions(self, filename, option): datei = open(filename, "r") dom = xml.dom.minidom.parse(datei) datei.close() option.files = [] for elem in dom.getElementsByTagName('Options'): for elem1 in elem.getElementsByTagName('ActivateEmail'): option.email = self.liesText(elem1) for passwordFile in elem.getElementsByTagName('passwordfile'): filename = passwordFile.getAttribute('filename') encodeId = self.getList(passwordFile.getAttribute('encodeid')) encodeId.insert(0, option.getEmail()) isDefault = self.getBoolean(passwordFile.getAttribute('isdefault')) needBackup = self.getBoolean(passwordFile.getAttribute('needbackup')) passwordFileOption = PasswordFileOption(filename, encodeId, isDefault=isDefault, needBackup=needBackup) option.files.append(passwordFileOption) self.readGuiOption(elem, option.gui) self.updateDefaultValues(option) self.controlEmailOld(option)
def fromXML(self, xmlstr): hede = 0 if type(xmlstr) is str: dom = xml.dom.minidom.parseString(xmlstr) else: dom = xmlstr hede = 1 tn = dom.getElementsByTagName("acl") if not tn: return acl = tn[0] if acl.getElementsByTagName("standalone"): self.standalone = 1 print "standalone" for rule in acl.getElementsByTagName("rule"): r = ACLRule() if rule.getElementsByTagName("quick"): r.quick = 1 tn = rule.getElementsByTagName("policy")[0] if tn: t = tn.firstChild.data if t == "Read only": r.policy = 1 elif t == "Allow": r.policy = 2 if rule.getElementsByTagName("not"): r.inverse = 1 tn = rule.getElementsByTagName("chain")[0] if tn: t = tn.firstChild.data try: r.chain = acl_chains[t] except: pass tn = rule.getElementsByTagName("value")[0] if tn: r.value = tn.firstChild.data[:] self.rules.append(r) if hede != 1: dom.unlink()
def _parse_gconf_schema(schema_file): ret = {} dom = xml.dom.minidom.parse(schema_file) for gconfschemafile in dom.getElementsByTagName('gconfschemafile'): for schemalist in gconfschemafile.getElementsByTagName('schemalist'): for schema in schemalist.getElementsByTagName('schema'): key = schema.getElementsByTagName('applyto')[0].childNodes[0].data type = schema.getElementsByTagName('type')[0].childNodes[0].data try: default = schema.getElementsByTagName('default')[0].childNodes[0].data if type == 'bool': if default: ret[key] = 'true' else: ret[key] = 'false' else: ret[key] = default except IndexError: ret[key] = '' # no gconf default return ret
class HostedTrackerAuth(TrackerAuth): """Authentication rules for hosted Tracker instances.""" def EstablishAuthToken(self, opener): """Returns the first auth token returned by /services/tokens/active.""" url = 'https://www.pivotaltracker.com/services/tokens/active' data = urllib.urlencode( (('username', self.username), ('password', self.password))) try: req = opener.open(url, data) except urllib2.HTTPError, e: if e.code == 404: raise NoTokensAvailableException( 'Did you create any? Check https://www.pivotaltracker.com/profile' ) else: raise res = req.read() dom = minidom.parseString(res) token = dom.getElementsByTagName('guid')[0].firstChild.data return token
def checkSentence(essaySentence): """Check the sentence usage errors using LanguageTool. """ logging.debug("checkSentence start.") essaySentence.ltCheckResults = [] params = (("language", "en"), ("text", essaySentence.sentence)) req = "http://localhost:8081/?" + urllib.urlencode(params) try: rep = urllib2.urlopen(req) except: logging.error("checkSentence error") return repstr = rep.read() dom = parseString(repstr) errors = dom.getElementsByTagName("error") for error in errors: checkResult = {} checkResult["fromy"] = error.attributes["fromy"].nodeValue checkResult["fromx"] = error.attributes["fromx"].nodeValue checkResult["toy"] = error.attributes["toy"].nodeValue checkResult["tox"] = error.attributes["tox"].nodeValue checkResult["ruleId"] = error.attributes["ruleId"].nodeValue checkResult["msg"] = error.attributes["msg"].nodeValue checkResult["replacements"] = error.attributes["replacements"].nodeValue checkResult["context"] = error.attributes["context"].nodeValue checkResult["contextoffset"] = error.attributes["contextoffset"].nodeValue checkResult["errorlength"] = error.attributes["errorlength"].nodeValue checkResult["errorbefore"] = checkResult["context"][: int(checkResult["contextoffset"])] checkResult["errorme"] = checkResult["context"][ int(checkResult["contextoffset"]) : int(checkResult["contextoffset"]) + int(checkResult["errorlength"]) ] checkResult["errorafter"] = checkResult["context"][ int(checkResult["contextoffset"]) + int(checkResult["errorlength"]) : ] if not checkResult["ruleId"] in IgnoredLtRuleIds: essaySentence.ltCheckResults.append(checkResult) logging.debug("checkSentence end.")
def Load(cls, XmlFilePath, Logger=None): Info = TilesetInfo() try: dom = xml.dom.minidom.parse(XmlFilePath) levels = dom.getElementsByTagName("Level") level = levels[0] Info.GridDimX = int(level.getAttribute('GridDimX')) Info.GridDimY = int(level.getAttribute('GridDimY')) Info.TileDimX = int(level.getAttribute('TileXDim')) Info.TileDimY = int(level.getAttribute('TileYDim')) fPre = level.getAttribute('FilePrefix') fPost = level.getAttribute('FilePostfix') Info.FilePrefix = level.getAttribute('FilePrefix') Info.FilePostfix = level.getAttribute('FilePostfix') Info.Downsample = float(level.getAttribute('Downsample')) except Exception as e: Logger.warning("Failed to parse XML File: " + XmlFilePath) Logger.warning(str(e)) return return Info
for infile in glob.glob(os.path.join(path, ".txt")): print("current file is: " + infile) dirList = os.listdir(path) c = {} #creating empty dictionaries l = {} words = [] for fname in dirList: myInput = open(path + '\\' + fname, 'r').read() dom1 = parse(path + '\\' + fname) # parse a file by name datasource = open(path + '\\' + fname) dom = parse(datasource) myInput.strip() #stripping of white spaces xmlTag = dom.getElementsByTagName('DOCNO')[0].firstChild.nodeValue.strip() xmlTag1 = dom.getElementsByTagName('TEXT')[0].firstChild.nodeValue.strip() xmlTag2 = dom.getElementsByTagName('TITLE')[0].firstChild.nodeValue.strip() c[xmlTag] = xmlTag2 l[xmlTag] = xmlTag1 words = words + xmlTag1.split(' ') f = open(path1 + '\\' + xmlTag, 'w') content = str(xmlTag1) f.write(content) f.close() outpath = open('C:\Users\Romi\Desktop\picklef.txt', 'wb') pickle.dump( c, outpath ) #pickling of data or serializing it to a file and unpickling it to the other file -query.py outpath.close()
def _getUseTags(self, dom): return dom.getElementsByTagName('use')
def parseConfigFile(filename): global x_offset # shapefile x offset relative to earth coords global y_offset # shapefile y offset relative to earth coords global z_offset # shapefile z offset relative to earth coords global x_scale # shapefile x scale to get to proper earth coords global y_scale # shapefile y scale to get to proper earth coords global z_scale # shapefile z scale to get to proper earth coords global feattype # field num for differentiating different styles global featname # field num for differentiating different styles global stylelist #fill a dict with styles dom = minidom.parse(filename) styl = dom.getElementsByTagName("styles") for st in styl: for style in st.getElementsByTagName("Style"): id = style.getAttribute("id") stylelist[id] = [] lstyles = style.getElementsByTagName("LineStyle") for l in lstyles: tmp = l.getElementsByTagName("color") color = getText(tmp[0].childNodes) tmp = l.getElementsByTagName("colorMode") colorMode = getText(tmp[0].childNodes) tmp = l.getElementsByTagName("width") width = getText(tmp[0].childNodes) stylelist[id].append(LineStyle(color, colorMode, width)) pstyles = style.getElementsByTagName("PolyStyle") for l in pstyles: tmp = l.getElementsByTagName("color") color = getText(tmp[0].childNodes) tmp = l.getElementsByTagName("colorMode") colorMode = getText(tmp[0].childNodes) tmp = l.getElementsByTagName("fill") fill = getText(tmp[0].childNodes) tmp = l.getElementsByTagName("outline") outline = getText(tmp[0].childNodes) stylelist[id].append(PolyStyle(color, colorMode, fill, outline)) #get the feat type and name feat = dom.getElementsByTagName("feattype") for f in feat: tmp = f.getElementsByTagName("feat_id") feattype = int(getText(tmp[0].childNodes)) tmp = f.getElementsByTagName("feat_name") featname = getText(tmp[0].childNodes) #get the offset and scales offsets = dom.getElementsByTagName("offset") for offset in offsets: tmp = offset.getElementsByTagName("x_offset") x_offset = float(getText(tmp[0].childNodes)) tmp = offset.getElementsByTagName("y_offset") y_offset = float(getText(tmp[0].childNodes)) scales = dom.getElementsByTagName("scale") for scale in scales: tmp = scale.getElementsByTagName("x_scale") x_scale = float(getText(tmp[0].childNodes)) tmp = scale.getElementsByTagName("y_scale") y_scale = float(getText(tmp[0].childNodes)) return
def _parse(self, path, model): """ Parses a ChannelML channel and adds it to the given model. Returns the new :class:`myokit.Component`. """ # Check model: get membrane potential varialbe vvar = model.label('membrane_potential') if vvar is None: raise ChannelMLError( 'No variable labelled "membrane_potential" was found. This is' ' required when adding ChannelML channels to existing models.') # Parse XML path = os.path.abspath(os.path.expanduser(path)) dom = xml.dom.minidom.parse(path) # Get channelml tag root = dom.getElementsByTagName('channelml') try: root = root[0] except IndexError: raise ChannelMLError( 'Unknown root element in xml document. Expecting a tag of type' ' <channelml>.') # Extract meta data meta = self._rip_meta(root) # Get channeltype tag root = root.getElementsByTagName('channel_type') try: root = root[0] except IndexError: raise ChannelMLError( 'No <channel_type> element found inside <channelml> element.' ' Import of <synapse_type> and <ion_concentration> is not' ' supported.') # Add channel component name = self._sanitise_name(root.getAttribute('name')) if name in model: name_root = name i = 2 while name in model: name = name_root + '_' + str(i) i += 1 component = model.add_component(name) # Add alias to membrane potential component.add_alias('v', vvar) # Add meta-data component.meta['desc'] = meta # Find current-voltage relation cvr = root.getElementsByTagName('current_voltage_relation') if len(cvr) < 1: raise ChannelMLError( 'Channel model must contain a current voltage relation.') elif len(cvr) > 1: warnings.warn( 'Multiple current voltage relations found, ignoring all but' ' first.') cvr = cvr[0] # Check for q10 try: q10 = cvr.getElementsByTagName('q10_settings')[0] component.meta['experimental_temperature'] = str( q10.getAttribute('experimental_temp')) except IndexError: pass # Add reversal potential E = 0 if cvr.hasAttribute('default_erev'): E = float(cvr.getAttribute('default_erev')) evar = component.add_variable('E') evar.meta['desc'] = 'Reversal potential' evar.set_rhs(E) # Get maximum conductance gmax = 1.0 if cvr.hasAttribute('default_gmax'): gmax = float(cvr.getAttribute('default_gmax')) gmaxvar = component.add_variable('gmax') gmaxvar.set_rhs(gmax) gmaxvar.meta['desc'] = 'Maximum conductance' # Add gates gvars = [] for gate in cvr.getElementsByTagName('gate'): gname = self._sanitise_name(gate.getAttribute('name')) gvar = component.add_variable(gname) gvar.promote(0) cstate = gate.getElementsByTagName('closed_state') cstate = cstate[0].getAttribute('id') ostate = gate.getElementsByTagName('open_state') ostate = ostate[0].getAttribute('id') # Transitions trans = gate.getElementsByTagName('transition') if len(trans) > 0: # Use "transitions" definition if len(trans) != 2: raise ChannelMLError( 'Expecting exactly 2 transitions for gate <' + gname + '>.') # Get closed-to-open state tco = None for t in trans: if t.getAttribute('to') == ostate and \ t.getAttribute('from') == cstate: tco = t break if tco is None: raise ChannelMLError( 'Unable to find closed-to-open transition for gate <' + gname + '>') # Get open-to-closed state toc = None for t in trans: if t.getAttribute('to') == cstate and \ t.getAttribute('from') == ostate: toc = t break if toc is None: raise ChannelMLError( 'Unable to find open-to-closed transition for gate <' + gname + '>') # Add closed-to-open transition tname = self._sanitise_name(tco.getAttribute('name')) tcovar = gvar.add_variable(tname) expr = str(tco.getAttribute('expr')) try: tcovar.set_rhs(self._parse_expression(expr, tcovar)) except myokit.ParseError as e: warnings.warn('Error parsing expression for closed-to-open' ' transition in gate <' + gname + '>: ' + myokit.format_parse_error(e)) tcovar.meta['expression'] = str(expr) # Add open-to-closed transition tname = self._sanitise_name(toc.getAttribute('name')) tocvar = gvar.add_variable(tname) expr = str(toc.getAttribute('expr')) try: tocvar.set_rhs(self._parse_expression(expr, tocvar)) except myokit.ParseError as e: warnings.warn('Error parsing expression for open-to-closed' ' transition in gate <' + gname + '>: ' + myokit.format_parse_error(e)) tocvar.meta['expression'] = str(expr) # Write equation for gate gvar.set_rhs( Minus(Multiply(Name(tcovar), Minus(Number(1), Name(gvar))), Multiply(Name(tocvar), Name(gvar)))) else: # Use "steady-state & time_course" definition ss = gate.getElementsByTagName('steady_state') tc = gate.getElementsByTagName('time_course') if len(ss) < 1 or len(tc) < 1: raise ChannelMLError( 'Unable to find transitions or steady state and' ' time course for gate <' + gname + '>.') ss = ss[0] tc = tc[0] # Add steady-state variable ssname = self._sanitise_name(ss.getAttribute('name')) ssvar = gvar.add_variable(ssname) expr = str(ss.getAttribute('expr')) try: ssvar.set_rhs(self._parse_expression(expr, ssvar)) except myokit.ParseError as e: warnings.warn( 'Error parsing expression for steady state in gate <' + gname + '>: ' + myokit.format_parse_error(e)) ssvar.meta['expression'] = str(expr) # Add time course variable tcname = self._sanitise_name(tc.getAttribute('name')) tcvar = gvar.add_variable(tcname) expr = str(tc.getAttribute('expr')) try: tcvar.set_rhs(self._parse_expression(expr, tcvar)) except myokit.ParseError as e: warnings.warn( 'Error parsing expression for time course in gate <' + gname + '>: ' + myokit.format_parse_error(e)) tcvar.meta['expression'] = str(expr) # Write expression for gate gvar.set_rhs( Divide(Minus(Name(ssvar), Name(gvar)), Name(tcvar))) power = int(gate.getAttribute('instances')) if power > 1: gvars.append(Power(Name(gvar), Number(power))) else: gvars.append(Name(gvar)) if len(gvars) < 1: raise ChannelMLError( 'Current voltage relation requires at least one gate.') # Add current variable ivar = component.add_variable('I') ivar.meta['desc'] = 'Current' expr = Name(gmaxvar) while gvars: expr = Multiply(expr, gvars.pop()) expr = Multiply(expr, Minus(Name(vvar), Name(evar))) ivar.set_rhs(expr) # Done, return component return component
#!/usr/bin/python2 from xml import dom.minidom import base64 import zlib import sys if len(sys.argv) != 3: print('Usage:', sys.argv[0], '<in> <out>') sys.exit(-1) # parse XML document dom = minidom.parse(sys.argv[1]) # pick it apart and do some basic validation mapelt = dom.getElementsByTagName('map')[0] if mapelt.getAttribute('orientation') != 'orthogonal': print('orientation must be orthogonal, not', mapelt.getAttribute('orientation')) sys.exit(-1) height = int(mapelt.getAttribute('height')) width = int(mapelt.getAttribute('width')) if height < 1 or width < 1: print('map is too small:', width, 'x', height) sys.exit(-1) # get tileset info tileset = mapelt.getElementsByTagName('tileset')[0] firstgid = int(tileset.getAttribute('firstgid')) # get actual map data
def __init__(self, resc): self.cover_id = None self.dom_metadata = None self.dom_spine = None self.metadata_array = None self.spine_array = None self.spine_skelid_dict = None self.spine_filename_dict = None if resc == None or len(resc) != 3: return [version, type_, data] = resc self.version = version self.type = type_ self.data = data # It seems to be able to handle utf-8 with a minidom module when # modifying a xml string in the RESC section as below. # However, it is not sure that the usage of minidom is proper. resc_xml = '' mo_xml = re.search(r'<\?xml[^>]*>', data, re.I) if mo_xml != None: resc_xml += mo_xml.group() else: resc_xml += '<?xml version="1.0" encoding="utf-8"?>' mo_package = re.search(r'(<package[^>]*>).*?(</package>)', data, re.I) if mo_package != None: resc_xml += mo_package.group(1) else: resc_xml += '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">' #resc_xml += '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">' mo_metadata = re.search(r'(<metadata[^>]*>).*?(</metadata>)', data, re.I) if mo_metadata != None: resc_xml += mo_metadata.group() mo_spine = re.search(r'(<spine[^>]*>).*?(</spine>)', data, re.I) if mo_spine != None: resc_xml += mo_spine.group() resc_xml += '</package>' dom = xml.dom.minidom.parseString(resc_xml) dom_metadata = dom.getElementsByTagName('metadata') if len(dom_metadata) > 0 and dom_metadata.item(0).hasChildNodes(): metadata_array = [] nodeList = dom_metadata.item(0).childNodes for i in range(nodeList.length): isvalid = True item = nodeList.item(i) if item.nodeType == xml.dom.Node.COMMENT_NODE: isvalid = False elif item.hasAttributes(): if item.hasAttribute('refines'): isvalid = False elif item.hasAttribute('name'): name = item.getAttribute('name') content = item.getAttribute('content').encode('utf-8') if name.lower() == 'cover': if len(content) > 0: self.cover_id = content metadata_array.append([isvalid]) self.dom_metadata = dom_metadata self.metadata_array = metadata_array dom_spine = dom.getElementsByTagName('spine') if len(dom_spine) > 0 and dom_spine.item(0).hasChildNodes(): nodeList = dom_spine.item(0).childNodes spine_array = [] for i in range(nodeList.length): item = nodeList.item(i) if item.nodeType == xml.dom.Node.COMMENT_NODE: continue elif item.hasAttributes(): if item.hasAttribute('skelid'): skelid = int(item.getAttribute('skelid')) item.removeAttribute('skelid') else: skelid = -1 spine_array.append([False, skelid, None]) self.dom_spine = dom_spine self.spine_array = spine_array self.createSkelidToSpineIndexDict()
def testGetElementsByTagName(): dom = parse(tstfile) confirm(dom.getElementsByTagName("LI") == \ dom.documentElement.getElementsByTagName("LI")) dom.unlink()
def parse_and_store_data(response, start_date): """Parsing XML data from San Francisco's Open311 endpoint and storing it in a postgres database""" import xml.dom reqs = [] # Lookup table: use a set since we don't need to associate the the attributes with values # May want to add 'updated' flag relevant_attrs = { 'service_request_id', 'status', 'service_name', 'service_code', 'description', 'requested_datetime', 'updated_datetime', 'expected_datetime', 'address', 'zipcode', 'lat', 'long' } try: print 'response' dom = minidom.parse(response) except xml.parsers.expat.ExpatError: print 'Expat error' append_log( 'err_log.txt', 'ExpatError. Start date: ' + days[start.weekday()] + ', ' + start.strftime('%Y-%m-%d')) return for node in dom.getElementsByTagName('request'): req_obj = {} for attr in node.childNodes: if attr.nodeType != xml.dom.Node.ELEMENT_NODE: continue if attr.childNodes: if attr.tagName in relevant_attrs: # http://wiki.postgresql.org/wiki/Introduction_to_VACUUM,_ANALYZE,_EXPLAIN,_and_COUNT // Don't insert null value? req_obj[attr.tagName] = attr.childNodes[ 0].data or None # will this work? # Check if you have a complete set of data for the request for relevant_attr in relevant_attrs: if relevant_attr not in req_obj: req_obj[ relevant_attr] = None # To insert null values either omit the field from the insert statement or use None # Rename the long attribute req_obj['lon'] = req_obj['long'] del req_obj['long'] #print req_obj['zipcode'] if req_obj['zipcode']: if not is_number(req_obj['zipcode']): req_obj['zipcode'] = None if float(req_obj['lat']) > 35 and float(req_obj['lon']) < -121: reqs.append(req_obj) append_log( 'log.txt', str(len(reqs)) + ' requests, start date: ' + start.isoformat() + ', ' + str(datetime.datetime.utcnow()) + '\n') #print 'reqs', reqs update_database(reqs)
#!/usr/bin/python3 from xml.dom.minidom import parse, parseString import xml.dom import sys def rec_parse(node, context): # node : xml.dom.Node for i in node.childNodes: if i.nodeType == xml.dom.Node.ELEMENT_NODE: if i.hasAttribute("comment"): print('QT_TRANSLATE_NOOP("' + context + '","' + i.getAttribute("comment") + '");') rec_parse(i, context) if len(sys.argv) != 2: print("usage: %s options.xml > output.cpp" % sys.argv[0]) sys.exit(1) print("#define QT_TRANSLATE_NOOP(a,b)") dom = parse(sys.argv[1]) # parse an XML file by name toplevel = dom.getElementsByTagName("psi")[0] options = toplevel.getElementsByTagName("options")[0] shortcuts = options.getElementsByTagName("shortcuts")[0] rec_parse(shortcuts, "Shortcuts")
def getDomains(targets, release): import urllib from xml.dom.minidom import parse import xml.dom import pickle pfamDict = {} ## Loop through all targets and get pfam domains. errors = [] for target in targets: #print "getting Pfam domains for %s" % target pfamDict[target] = {} pfamDict[target]["domains"] = [] pfamDict[target]["start"] = [] pfamDict[target]["end"] = [] opener = urllib.FancyURLopener({}) f = opener.open("http://pfam.sanger.ac.uk/protein/%s?output=xml" % target) dom = parse(f) if not dom.getElementsByTagName('sequence'): #print "encountered Error for %s" %target errors.append(target) del pfamDict[target] continue for pfam in dom.childNodes: if pfam.nodeName == 'pfam': for entry in pfam.childNodes: if entry.nodeName == 'entry': for matches in entry.childNodes: if matches.nodeName == 'matches': for match in matches.childNodes: if match.nodeName == 'match': if match.getAttribute( 'type') == 'Pfam-A': pfamDict[target]['domains'].append( match.getAttribute('id')) for location in match.childNodes: if location.nodeName == 'location': start = location.getAttribute( 'start') end = location.getAttribute( 'end') pfamDict[target][ 'start'].append( int(start)) pfamDict[target][ 'end'].append(int(end)) dom.unlink() # Add domain count. pfamDict[target]['count'] = len(pfamDict[target]['domains']) # Calculate and add the uniq count of domains. uniqDomains = {} for domain in pfamDict[target]['domains']: uniqDomains[domain] = 0 pfamDict[target]['countUnique'] = len(uniqDomains) ## Pickle the PfamDict output = open('data/protCodPfamDict_%s.pkl' % release, 'w') pickle.dump(pfamDict, output) print "encountered Error for", errors return pfamDict