def search (self, terms): """ Search for a set of terms, returns a list of IDs to parse, which is then fed to self.fetch for data retrieval. """ import types, urllib from xml.dom import pulldom id_list = [] try: if isinstance(terms, types.ListType): url = self.esearch_url.replace('[[TERMS]]', urllib.quote_plus((' '.join([str[term] for term in terms])))) else: url = self.esearch_url.replace('[[TERMS]]', urllib.quote_plus(str(terms))) xmls = urllib.urlopen(url).read() events = pulldom.parseString(xmls) for event, node in events: if event == 'START_ELEMENT' \ and node.tagName == 'Id': events.expandNode(node) id = self._get_text(node) id_list.append(id) except Exception, e: self.logger.error('Unable to search Pubmed:', e) self.logger.error(traceback.format_stack()) return []
def extract(input_xml): """Process entire input XML document, firing on events""" # Start pulling; it continues automatically doc = pulldom.parseString(input_xml) output = '' for event, node in doc: # elements to ignore: xml if event == pulldom.START_ELEMENT and node.localName in ignore: continue # copy comments intact elif event == pulldom.COMMENT: doc.expandNode(node) output += node.toxml() # empty inline elements: pb, milestone elif event == pulldom.START_ELEMENT and node.localName in inlineEmpty: output += node.toxml() # non-empty inline elements: note, hi, head, l, lg, div, p, ab, elif event == pulldom.START_ELEMENT and node.localName in inlineContent: output += regexEmptyTag.sub('>', node.toxml()) elif event == pulldom.END_ELEMENT and node.localName in inlineContent: output += '</' + node.localName + '>' elif event == pulldom.START_ELEMENT and node.localName in blockElement: output += '\n<' + node.localName + '>\n' elif event == pulldom.END_ELEMENT and node.localName in blockElement: output += '\n</' + node.localName + '>' elif event == pulldom.CHARACTERS: output += normalizeSpace(node.data) else: continue return output
def openAPIparse(string, parser=None): if parser is None: from xml.dom import expatbuilder return expatbuilder.parseString(string) else: from xml.dom import pulldom return pulldom.parseString()
def products_search_xml_api(): parser = make_parser() parser.setFeature(feature_external_ges, True) try: document = pulldom.parseString(request.data.decode(), parser=parser) str_xml = '' for event, node in document: if event == pulldom.START_ELEMENT: exp = document.expandNode(node) if exp: str_xml += exp str_xml += node.toxml() data = xmltodict.parse(str_xml) query = data.get('search').get('query') except (SAXException, ValueError) as e: return error_response(400, 'XML parse error - %s' % e) except Exception as e: return error_response(400, e) try: return jsonify([{ 'id': product.id, 'name': product.name, 'price': product.price, 'description': product.description, 'image': product.image, 'stock': product.stock } for product in Product.query.filter( (Product.name.contains(query)) | (Product.description.contains(query))).limit(100).all()]) except Exception as e: return error_response(400, 'Malformed Query %s' % query)
def cv_event_handler(event): print("Received event for bucket: {}, key: {}".format(event.bucket, event.key)) file_name = event.key s3 = boto3.resource('s3') try: sleep(2) email = get_user_for_event(file_name) if email: docfile = s3.Object(CV_BUCKET, file_name) docbody = docfile.get()['Body'].read() doc = parseString(docbody.decode('utf-8')) content = '' for event, node in doc: doc.expandNode(node) content = node.toxml() try: print("content", content) cv_table = dynamo.Table(CV_TBL) response = cv_table.get_item(Key={'filename': file_name}) item = response['Item'] item['file_content'] = b64encode(content.encode()).decode('utf-8') cv_table.put_item(Item=item) app.log.debug(response) except Exception as e: print(e.message) else: raise Exception("Unable to find email") except Exception as e: print(e)
def parseXML(stream, parser=None): if isinstance(stream, six.string_types): events = pulldom.parseString(stream, parser) else: events = pulldom.parse(stream, parser) document = None chain = [] for event, node in events: if event == "START_DOCUMENT": chain.append(XMLNode("DOCUMENT", {})) elif event == "START_ELEMENT": node = XMLNode.fromDOMNode(node) if chain: chain[-1].children.append(node) chain.append(node) elif event == "END_ELEMENT": chain.pop(-1) elif event == "CHARACTERS": chain[-1].data += node.data elif event == "END_DOCUMENT": document = chain.pop(-1) return document or chain[0]
def test_expandItem(self): """Ensure expandItem works as expected.""" items = pulldom.parseString(SMALL_SAMPLE) for evt, item in items: if evt == pulldom.START_ELEMENT and item.tagName == 'title': items.expandNode(item) self.assertEqual(1, len(item.childNodes)) break else: self.fail('No "title" element detected in SMALL_SAMPLE!') for evt, node in items: if evt == pulldom.START_ELEMENT: break self.assertEqual('hr', node.tagName, 'expandNode did not leave DOMEventStream in the correct state.') items.expandNode(node) self.assertEqual(next(items)[0], pulldom.CHARACTERS) evt, node = next(items) self.assertEqual(node.tagName, 'p') items.expandNode(node) next(items) evt, node = next(items) self.assertEqual(node.tagName, 'html') with self.assertRaises(StopIteration): next(items) items.clear() self.assertIsNone(items.parser) self.assertIsNone(items.stream)
def test_expandItem(self): """Ensure expandItem works as expected.""" items = pulldom.parseString(SMALL_SAMPLE) # Loop through the nodes until we get to a "title" start tag: for evt, item in items: if evt == pulldom.START_ELEMENT and item.tagName == "title": items.expandNode(item) self.assertEqual(1, len(item.childNodes)) break else: self.fail("No \"title\" element detected in SMALL_SAMPLE!") # Loop until we get to the next start-element: for evt, node in items: if evt == pulldom.START_ELEMENT: break self.assertEqual("hr", node.tagName, "expandNode did not leave DOMEventStream in the correct state.") # Attempt to expand a standalone element: items.expandNode(node) self.assertEqual(next(items)[0], pulldom.CHARACTERS) evt, node = next(items) self.assertEqual(node.tagName, "p") items.expandNode(node) next(items) # Skip character data evt, node = next(items) self.assertEqual(node.tagName, "html") with self.assertRaises(StopIteration): next(items) items.clear() self.assertIsNone(items.parser) self.assertIsNone(items.stream)
def _flatten(line: str) -> str: """Clean and flatten input Keyword argument: input -- line of poetry to process, as well-formed XML, with <stress> tags Convert stressed vowels to uppercase and remove stress tags Convert other text to lowercase Strip punctuation Normalize white space """ if _XML_RE.match(line): in_stress = 0 # are we inside a <stress> element? result = [] # accumulate output string doc = pulldom.parseString(line) for event, node in doc: if event == pulldom.START_ELEMENT and node.localName == 'stress': in_stress = 1 elif event == pulldom.END_ELEMENT and node.localName == 'stress': in_stress = 0 elif event == pulldom.CHARACTERS: if in_stress: result.append(node.data.upper()) else: result.append(node.data.lower()) return _PUNC_RE.sub("", "".join(result)) else: raise Exception(line, "is not tagged correctly")
def xxe_pulldom(): attack = request.form['attack'] test_string = "<!DOCTYPE doc [ " \ "<!ENTITY pulldom SYSTEM \"file:///tmp/marker\"> " \ "<!ENTITY pulldom2 SYSTEM \"http://www.google.com/marker\"> " \ "]>\n" \ "<root>\n" \ "<element>&pulldom;</element>\n" \ "<element>&pulldom2;</element>\n" \ "</root>\n" if str(attack).lower() == 'true': pulldom.parseString(test_string) result = 'PullDOM XXE Attack Attempted' else: result = '' return render_template('xxe_pulldom.html', result=result)
def xxe(): doc = parseString(request.form['xxe']) for event, node in doc: if event == START_ELEMENT and node.localName == "items": doc.expandNode(node) nodes = node.toxml() return render_template("index.html", nodes=nodes)
def search(self, terms): """ Search for a set of terms, returns a list of IDs to parse, which is then fed to self.fetch for data retrieval. """ import types, urllib from xml.dom import pulldom id_list = [] try: if isinstance(terms, types.ListType): url = self.esearch_url.replace( '[[TERMS]]', urllib.quote_plus( (' '.join([str[term] for term in terms])))) else: url = self.esearch_url.replace('[[TERMS]]', urllib.quote_plus(str(terms))) xmls = urllib.urlopen(url).read() events = pulldom.parseString(xmls) for event, node in events: if event == 'START_ELEMENT' \ and node.tagName == 'Id': events.expandNode(node) id = self._get_text(node) id_list.append(id) except Exception, e: self.logger.error('Unable to search Pubmed:', e) self.logger.error(traceback.format_stack()) return []
def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement")) self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName) self.assertEqual(2, len(node.attributes)) self.assertEqual( node.attributes.getNamedItem("xmlns:xdc").value, "http://www.xml.com/books") evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) # Line break evt, node = next(items) # XXX - A comment should be reported here! # self.assertEqual(pulldom.COMMENT, evt) # Line break after swallowed comment: self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual("title", node.tagName) title_node = node evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("Introduction to XSL", node.data) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("title", node.tagName) self.assertTrue(title_node is node) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt)
def test_comment(self): """PullDOM does not receive "comment" events.""" items = pulldom.parseString(SMALL_SAMPLE) for evt, _ in items: if evt == pulldom.COMMENT: break else: self.fail("No comment was encountered")
def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement")) self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName) self.assertEqual(2, len(node.attributes)) self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, "http://www.xml.com/books") evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) # Line break evt, node = next(items) # XXX - A comment should be reported here! # self.assertEqual(pulldom.COMMENT, evt) # Line break after swallowed comment: self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual("title", node.tagName) title_node = node evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("Introduction to XSL", node.data) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("title", node.tagName) self.assertTrue(title_node is node) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt)
def fetchQuadrangle(dataset,yearMonth,resolution,sequence): # Format a URI strYearMonth = "{}-{:02d}".format(yearMonth.year,yearMonth.month) url = serviceURI+dataset+"/"+strYearMonth+"/"+str(resolution)+"/"+str(sequence); print url # Open an HTTP Request response = None try: response = urllib2.urlopen(url) except urllib2.HTTPError as e: return None html = None # Unpack the response if response.headers.get('content-encoding', '') == 'gzip': data = response.read() compressedstream = StringIO.StringIO(data) gzipper = gzip.GzipFile(fileobj=compressedstream) html = gzipper.read() else: html = response.read() # Parse the markup parser = sax.make_parser() parser.setFeature(sax.handler.feature_namespaces, 1) doc = pulldom.parseString(html,parser) inTable = False def textContent(parent): s = ""; for n in parent.childNodes: if n.data != None: s += n.data return s # Process the markup as a stream and detect the table of data data = [] for event, node in doc: if event == pulldom.START_ELEMENT and node.tagName == 'table': if node.getAttribute("typeof") == "IndexedTable": inTable = True if event == pulldom.END_ELEMENT and node.tagName == 'table': inTable = False if inTable and event == pulldom.START_ELEMENT and node.tagName == 'td': doc.expandNode(node) if len(node.childNodes) > 0: data.append(float(textContent(node))) if len(data) == 0: return None # Return the sequence number data object return {"dataset": dataset, "yearMonth": strYearMonth, "resolution" : resolution, "sequence": sequence, "data": data }
def xxe(): doc = parseString(request.form['xxe']) try: for event, node in doc: if event == START_ELEMENT and node.localName == "items": doc.expandNode(node) nodes = node.toxml() return render_template("index.html", nodes=nodes) except (UnboundLocalError, xml.sax._exceptions.SAXParseException): return render_template("index.html")
def fetchQuadrangle(dataset, yearMonth, resolution, sequence): url = serviceURI + dataset + "/" + yearMonth + "/" + str( resolution) + "/" + str(sequence) print url response = None try: response = urllib2.urlopen(url) except urllib2.HTTPError as e: return None html = None if response.headers.get('content-encoding', '') == 'gzip': data = response.read() compressedstream = StringIO.StringIO(data) gzipper = gzip.GzipFile(fileobj=compressedstream) html = gzipper.read() else: html = response.read() parser = sax.make_parser() parser.setFeature(sax.handler.feature_namespaces, 1) doc = pulldom.parseString(html, parser) inTable = False def textContent(parent): s = "" for n in parent.childNodes: if n.data != None: s += n.data return s data = [] for event, node in doc: if event == pulldom.START_ELEMENT and node.tagName == 'table': if node.getAttribute("typeof") == "IndexedTable": inTable = True if event == pulldom.END_ELEMENT and node.tagName == 'table': inTable = False if inTable and event == pulldom.START_ELEMENT and node.tagName == 'td': doc.expandNode(node) if len(node.childNodes) > 0: data.append(float(textContent(node))) if len(data) == 0: return None return { "dataset": dataset, "yearMonth": yearMonth, "resolution": resolution, "sequence": sequence, "data": data }
def upload(): if request.method == 'GET': return render_template("index.html") elif request.method == 'POST': file = request.files.get('file') data = parseString(file.read()) content = '' for event, node in data: data.expandNode(node) content = node.toxml() return render_template("index.html", data=content)
def XML_validator(): doc = parseString(request.form['customers']) try: for event, node in doc: if event == START_ELEMENT and node.localName == "customers": doc.expandNode(node) nodes = node.toxml() return render_template("validator/index.html", nodes=nodes) except: return render_template("validator/index.html", error="Validation failed") return render_template("validator/index.html", error="Validation failed")
def make_parser(stream_or_string): """Create a xml.dom.pulldom parser.""" if is_text(stream_or_string): # XXX: the pulldom.parseString() function doesn't seem to # like operating on unicode strings! return pulldom.parseString(str(stream_or_string)) else: return pulldom.parse(stream_or_string)
def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) self.assertTrue(hasattr(node, 'createElement')) self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual('html', node.tagName) self.assertEqual(2, len(node.attributes)) self.assertEqual(node.attributes.getNamedItem('xmlns:xdc').value, 'http://www.xml.com/books') evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual('title', node.tagName) title_node = node evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual('Introduction to XSL', node.data) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual('title', node.tagName) self.assertTrue(title_node is node) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual('hr', node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual('hr', node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual('p', node.tagName) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual('xdc:author', node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual('xdc:author', node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt)
def test_end_document(self): """PullDOM does not receive "end-document" events.""" items = pulldom.parseString(SMALL_SAMPLE) for evt, node in items: if evt == pulldom.END_ELEMENT and node.tagName == 'html': break try: evt, node = next(items) self.assertEqual(pulldom.END_DOCUMENT, evt) except StopIteration: self.fail( 'Ran out of events, but should have received END_DOCUMENT')
def get_nodes_from_xml(src): if type(src)==str: events = pulldom.parseString(src) else: # file like object events = pulldom.parse(src) try: for (event, node) in events: if event == pulldom.START_ELEMENT and node.tagName == "node": events.expandNode(node) yield node except Exception as e: print(e, file=sys.stderr)
def make_parser(stream_or_string): """Create a xml.dom.pulldom parser.""" if isinstance(stream_or_string, six.string_types): # XXX: the pulldom.parseString() function doesn't seem to # like operating on unicode strings! return pulldom.parseString(str(stream_or_string)) else: return pulldom.parse(stream_or_string)
def test_end_document(self): """PullDOM does not receive "end-document" events.""" items = pulldom.parseString(SMALL_SAMPLE) # Read all of the nodes up to and including </html>: for evt, node in items: if evt == pulldom.END_ELEMENT and node.tagName == "html": break try: # Assert that the next node is END_DOCUMENT: evt, node = next(items) self.assertEqual(pulldom.END_DOCUMENT, evt) except StopIteration: self.fail( "Ran out of events, but should have received END_DOCUMENT")
def xxe_parse(request): parser = make_parser() parser.setFeature(feature_external_ges, True) doc = parseString(request.body.decode('utf-8'), parser=parser) for event, node in doc: if event == START_ELEMENT and node.tagName == 'text': doc.expandNode(node) text = node.toxml() startInd = text.find('>') endInd = text.find('<', startInd) text = text[startInd + 1:endInd:] p = comments.objects.filter(id=1).update(comment=text) return render(request, 'Lab/XXE/xxe_lab.html')
def injection(self): if request.method == 'POST': # Check if data is not empty, post forms has all params defined # which may be empty and cause unexpected behaviour if request.form['input_data'] != '': # Instanciate an XML parser allowing unsafe external # sources to to be parsed by xml.parseString parser = make_parser() parser.setFeature(feature_external_ges, True) doc = parseString(request.form['input_data'], parser=parser) for event, node in doc: doc.expandNode(node) return (node.toxml()) else: return redirect(request.url) return render_template('xml.html')
def __call__(self): self.readXML = self.aggregateServiceDataToXML() doc = parseString(self.readXML) finalXML = StringIO() finalXML.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") finalXML.write( "<Cloud xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"" + XSD + "\">") headNodeXML = StringIO() workerNodeXML = StringIO() # Here the pulldom API is used to extract the XML nodes under any "HeadNode" tags and write them to the finalXML for XSLT processing for event, node in doc: if event == xml.dom.pulldom.START_ELEMENT: if node.localName == "HeadNode": doc.expandNode(node) tempString = node.toxml() # The fancy string index [10:-11] is used to eliminate the <HeadeNode></HeadNode> tags from the output headNodeXML.write(tempString[10:-11]) if node.localName == "Node": doc.expandNode(node) tempString = node.toxml() workerNodeXML.write(tempString) finalXML.write("<HeadNode>") # This tag is added for the "Optional Cloud Name" of the public XML schema. An 'id' attribute MUST be specified or the XSLs will remove this CloudName tag from the final XML. The 'id' is arbritrary finalXML.write("<CloudName id='arbitrary11235813'>" + ConfigMapping[CLOUD_NAME] + "</CloudName>") finalXML.write(headNodeXML.getvalue()) finalXML.write("</HeadNode>") finalXML.write("<WorkerNodes>") finalXML.write(workerNodeXML.getvalue()) finalXML.write("</WorkerNodes>") finalXML.write("</Cloud>") # The various stylesheets are applied "serially" to the final XML to pepare it for publishing return self.applyStyleSheet( ConfigMapping[NAGIOS_LOCATION] + ATTRIBUTE_STRIP_XSL, self.applyStyleSheet( ConfigMapping[NAGIOS_LOCATION] + MERGE_NODES_XSL, self.applyStyleSheet( ConfigMapping[NAGIOS_LOCATION] + REMOVE_DUP_XSL, finalXML.getvalue())))
def cv_event_handler(event): print("Received event for bucket: {}, key: {}".format( event.bucket, event.key)) file_name = event.key s3 = boto3.resource('s3') try: sleep(2) email = get_user_for_event(file_name) if email: docfile = s3.Object('training-cv-uploader', file_name) docbody = docfile.get()['Body'].read() doc = parseString(docbody) content = '' for event, node in doc: # print("Node", node.toxml()) doc.expandNode(node) content = node.toxml() # print "content", content # docobj = Document(docbody) # all_paras = "" # for para in docobj.paragraphs: # all_paras += para.text # all_paras = b64encode(all_paras) # print(all_paras) try: cv_table = dynamo.Table('cv_data') response = cv_table.get_item(Key={'filename': file_name}) item = response['Item'] item['file_content'] = b64encode(content) cv_table.put_item(Item=item) # response = cv_table.update_item( # Key = {'filename': file_name}, # UpdateExpression = "set file_content = :fc", # ExpressionAttributeValues = {":fc": b64encode(content)}, # ReturnValues = "UPDATED_NEW" # ) app.log.debug(response) except Exception as e: print(e.message) else: raise Exception("Unable to find email") except Exception as e: print(e)
def _parse_response(self, content): bugs = {} stream = pulldom.parseString(content) for (event, node) in stream: if event == "START_ELEMENT" and node.tagName == "bug": stream.expandNode(node) error = node.getAttribute("error") if error: raise IssueError(error) bugs['alias'] = node.getElementsByTagName("bug_id")[0].firstChild.data bugs['name'] = node.getElementsByTagName("short_desc")[0].firstChild.data bugs['status'] = node.getElementsByTagName("bug_status")[0].firstChild.data bugs['resolution'] = node.getElementsByTagName("resolution") or "" if bugs['resolution']: bugs['resolution'] = bugs['resolution'][0].firstChild.data return bugs
def __process_event(self, eventdata): """ Private method called while nmap process is running. It enables the library to handle specific data/events produced by nmap process. So far, the following events are supported: 1. task progress: updates estimated time to completion and percentage done while scan is running. Could be used in combination with a callback function which could then handle this data while scan is running. 2. nmap run: header of the scan. Usually displayed when nmap is started 3. finished: when nmap scan ends. :return: True is event is known. :todo: handle parsing directly via NmapParser.parse() """ rval = False try: edomdoc = pulldom.parseString(eventdata) for xlmnt, xmlnode in edomdoc: if xlmnt is not None and xlmnt == pulldom.START_ELEMENT: if (xmlnode.nodeName == 'taskprogress' and xmlnode.attributes.keys()): percent_done = xmlnode.attributes['percent'].value etc_done = xmlnode.attributes['etc'].value self.__progress = percent_done self.__etc = etc_done rval = True elif (xmlnode.nodeName == 'nmaprun' and xmlnode.attributes.keys()): self.__starttime = xmlnode.attributes['start'].value self.__version = xmlnode.attributes['version'].value rval = True elif (xmlnode.nodeName == 'finished' and xmlnode.attributes.keys()): self.__endtime = xmlnode.attributes['time'].value self.__elapsed = xmlnode.attributes['elapsed'].value self.__summary = xmlnode.attributes['summary'].value rval = True except: pass return rval
def _fetchhead(self): """ Fetches the head information. If there are no variables in the <head>, then we also fetch the boolean result. """ self.events = pulldom.parseString(self.__xml) for (event, node) in self.events: if event == pulldom.START_ELEMENT: if node.tagName == 'variable': self.variables.append(node.attributes['name'].value) elif node.tagName == 'boolean': self.events.expandNode(node) self._hasResult = (node.firstChild.data == 'true') elif node.tagName == 'result': return # We should not arrive here elif event == pulldom.END_ELEMENT: if node.tagName == 'head' and self.variables: return elif node.tagName == 'sparql': return
def xxecomment(username, request) -> None: """ parse xml unsafely (allowing external entities) and add comment to database username - username of PyGoat user request - a flask request object """ parser = make_parser() parser.setFeature(feature_external_ges, True) doc = parseString(request.data.decode('utf-8'), parser=parser) for event, node in doc: if event == START_ELEMENT and node.tagName == 'text': doc.expandNode(node) text = node.toxml() startInd = text.find('>') endInd = text.find('<', startInd) text = text[startInd + 1:endInd:] conn = sqlite3.connect('pygoat.db') c = conn.cursor() c.execute('''INSERT INTO xxe_comments VALUES (?,?)''', (username, text)) conn.commit() conn.close()
def __call__(self): self.readXML = self.aggregateServiceDataToXML() doc = parseString(self.readXML) finalXML = StringIO() finalXML.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") finalXML.write("<Cloud xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\""+XSD+"\">") headNodeXML = StringIO() workerNodeXML = StringIO() # Here the pulldom API is used to extract the XML nodes under any "HeadNode" tags and write them to the finalXML for XSLT processing for event, node in doc: if event == xml.dom.pulldom.START_ELEMENT: if node.localName == "HeadNode": doc.expandNode(node) tempString = node.toxml() # The fancy string index [10:-11] is used to eliminate the <HeadeNode></HeadNode> tags from the output headNodeXML.write(tempString[10:-11]) if node.localName =="Node": doc.expandNode(node) tempString = node.toxml() workerNodeXML.write(tempString) finalXML.write("<HeadNode>") # This tag is added for the "Optional Cloud Name" of the public XML schema. An 'id' attribute MUST be specified or the XSLs will remove this CloudName tag from the final XML. The 'id' is arbritrary finalXML.write("<CloudName id='arbitrary11235813'>"+ConfigMapping[CLOUD_NAME]+"</CloudName>") finalXML.write(headNodeXML.getvalue()) finalXML.write("</HeadNode>") finalXML.write("<WorkerNodes>") finalXML.write(workerNodeXML.getvalue()) finalXML.write("</WorkerNodes>") finalXML.write("</Cloud>") # The various stylesheets are applied "serially" to the final XML to pepare it for publishing return self.applyStyleSheet(ConfigMapping[NAGIOS_LOCATION]+ATTRIBUTE_STRIP_XSL,self.applyStyleSheet(ConfigMapping[NAGIOS_LOCATION]+MERGE_NODES_XSL,self.applyStyleSheet(ConfigMapping[NAGIOS_LOCATION]+REMOVE_DUP_XSL,finalXML.getvalue())))
def add_product(): if not request.files: xml = request.data else: file = request.files['file'] xml = file.read() data = parseString(xml) prod_details = dict() for event, node in data: if event == START_ELEMENT and node.tagName == 'product': data.expandNode(node) for details in node.childNodes: if not getattr(details, 'tagName', None): continue prod_details[details.tagName] =\ "".join(detail.nodeValue for detail in details.childNodes) product = Product( name=prod_details['name'], owner=prod_details['owner'], price=prod_details['price'] ) with psycopg2.connect(CONNECTION_DATA) as conn: with closing(conn.cursor()) as cur: cur.execute( "INSERT INTO product (name, price, owner) VALUES ('{NAME}', '{PRICE}', '{OWNER}')" .format( NAME=product.name[:254], PRICE=product.price[:254], OWNER=product.owner[:254]) ) conn.commit() return "{NAME};{OWNER};{PRICE}".format( NAME=product.name, OWNER=product.owner, PRICE=product.price )
def parseString(self, st): self.log.debug("Setting up parser...") evt_stream = pulldom.parseString(st) self.log.debug("{}Parsing started with Parsable set {}".format('Sub-' if self.active else '', dict((p,self.__parsables[p].tag()) for p in self.__parsables))) for x in self._parse_evt_stream(evt_stream): yield x
def loadString(self,xml_string): events = pulldom.parseString(xml_string) self.parse(events)
def convert_xml_string_into_tokens(xml_string): doc = parseString(xml_string) return convert_xml_doc_into_tokens(doc)
def __init__(self, stream_or_string): self.event_stream = pulldom.parseString(stream_or_string)
from xml.dom.pulldom import CHARACTERS, START_ELEMENT, parseString, END_ELEMENT # Use djb development version of collatex (https://github.com/djbpitt/collatex, "experimental" branch) sys.path.append('/Users/djb/collatex/collatex-pythonport/') from collatex import * class Stack(list): def push(self, item): self.append(item) def peek(self): return self[-1] # Initialize input and output source = open('pizarnik.xml','r').read() doc = parseString(source) witnesses = {} # Only process content inside witnesses inWitness = False inLine = False # Tokenize, keeping leading whitespace (whitespace after last token is processed separately) def tokenize(contents): return re.findall(r'\s*\S+', contents) # Regex startWhite = re.compile(r'\s+') # strip leading whitespace; match() is automatically anchored at the start endWhite = re.compile(r'\S\s+$') # test for trailing whitespace to include in output for event, node in doc:
def __init__(self, xml): self._events = pulldom.parseString(xml)
def __process_event(self, eventdata): """ Private method called while nmap process is running. It enables the library to handle specific data/events produced by nmap process. So far, the following events are supported: 1. task progress: updates estimated time to completion and percentage done while scan is running. Could be used in combination with a callback function which could then handle this data while scan is running. 2. nmap run: header of the scan. Usually displayed when nmap is started 3. finished: when nmap scan ends. :return: True is event is known. :todo: handle parsing directly via NmapParser.parse() """ rval = False try: edomdoc = pulldom.parseString(eventdata) for xlmnt, xmlnode in edomdoc: if xlmnt is not None and xlmnt == pulldom.START_ELEMENT: if (xmlnode.nodeName == 'taskbegin' and xmlnode.attributes.keys()): xt = xmlnode.attributes taskname = xt['task'].value starttime = xt['time'].value xinfo = '' if 'extrainfo' in xt.keys(): xinfo = xt['extrainfo'].value newtask = NmapTask(taskname, starttime, xinfo) self.__nmap_tasks[newtask.name] = newtask self.__current_task = newtask.name rval = True elif (xmlnode.nodeName == 'taskend' and xmlnode.attributes.keys()): xt = xmlnode.attributes tname = xt['task'].value xinfo = '' self.__nmap_tasks[tname].endtime = xt['time'].value if 'extrainfo' in xt.keys(): xinfo = xt['extrainfo'].value self.__nmap_tasks[tname].extrainfo = xinfo self.__nmap_tasks[tname].status = "ended" rval = True elif (xmlnode.nodeName == 'taskprogress' and xmlnode.attributes.keys()): xt = xmlnode.attributes tname = xt['task'].value percent = xt['percent'].value etc = xt['etc'].value remaining = xt['remaining'].value updated = xt['time'].value self.__nmap_tasks[tname].percent = percent self.__nmap_tasks[tname].progress = percent self.__nmap_tasks[tname].etc = etc self.__nmap_tasks[tname].remaining = remaining self.__nmap_tasks[tname].updated = updated rval = True elif (xmlnode.nodeName == 'nmaprun' and xmlnode.attributes.keys()): self.__starttime = xmlnode.attributes['start'].value self.__version = xmlnode.attributes['version'].value rval = True elif (xmlnode.nodeName == 'finished' and xmlnode.attributes.keys()): self.__endtime = xmlnode.attributes['time'].value self.__elapsed = xmlnode.attributes['elapsed'].value self.__summary = xmlnode.attributes['summary'].value rval = True except: pass return rval
def test_external_ges_default(self): parser = pulldom.parseString(SMALL_SAMPLE) saxparser = parser.parser ges = saxparser.getFeature(feature_external_ges) self.assertEqual(ges, False)
def test_getitem_deprecation(self): parser = pulldom.parseString(SMALL_SAMPLE) with self.assertWarnsRegex(DeprecationWarning, r'Use iterator protocol instead'): # This should have returned 'END_ELEMENT'. self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
def buscarcep(cep): """ Localiza o CEP informado no argumento utilizando o serviço disponibilizado pelo site www.buscarcep.com.br. Retorna um dicionário contendo as informações obtidas. As chaves retornadas são: 'cep', 'uf', 'cidade', 'bairro', 'tipo_logradouro', e 'logradouro'. Para avaliar o resultado do retorno, verifique as chaves 'resultado' e 'resultado_txt'. Para maiores detalhes consulte o site do serviço em www.buscarcep.com.br. """ url = urllib.urlopen("http://www.buscarcep.com.br/?cep=" + cep + "&formato=xml") cepinfo = { "cep": "", "uf": "", "cidade": "", "bairro": "", "tipo_logradouro": "", "logradouro": "", "resultado": 0, "resultado_txt": "", } if url: texto = url.read() url.close() events = pulldom.parseString(texto) xpath = "" for event, node in events: if event == pulldom.START_ELEMENT: xpath += "/" + node.nodeName elif event == pulldom.END_ELEMENT: pos = xpath.rfind("/") xpath = xpath[0:pos] elif event == pulldom.CHARACTERS: if xpath == "/webservicecep/retorno/cep": cepinfo["cep"] = node.nodeValue elif xpath == "/webservicecep/retorno/uf": cepinfo["uf"] = node.nodeValue elif xpath == "/webservicecep/retorno/cidade": cepinfo["cidade"] = node.nodeValue elif xpath == "/webservicecep/retorno/bairro": cepinfo["bairro"] = node.nodeValue elif xpath == "/webservicecep/retorno/tipo_logradouro": cepinfo["tipo_logradouro"] = node.nodeValue elif xpath == "/webservicecep/retorno/logradouro": cepinfo["logradouro"] = node.nodeValue elif xpath == "/webservicecep/retorno/resultado": cepinfo["resultado"] = int(node.nodeValue) elif xpath == "/webservicecep/retorno/resultado_txt": cepinfo["resultado_txt"] = node.nodeValue else: # erro na conexão cepinfo["resultado"] = 0 cepinfo["resultado_txt"] = "Erro na conexão" return [cepinfo]