def createXML(): XMLFile = open('Output.xml', 'w') tree = elements[idOfRootElement].createXMLNode(elements, attributes) soup = BeautifulStoneSoup(tree) XMLFile.write('<?xml version="1.0"?>') XMLFile.write(soup.prettify()) XMLFile.close()
def parseXML(xml, dbHandler): elements = dbHandler.getColumnNames() soup = BeautifulStoneSoup(xml) el = soup.findAll(elements[0]) root = soup.find() results = [] results.append(extractRows(elements, root)) listOfRows = convertToListOfRows(results[0])[1:] # all data is held in main memory before insertion into the db for row in listOfRows: dbHandler.insertRow(row)
import re from extra_lib.BeautifulSoup import BeautifulStoneSoup from qsx import run # User specifies the following mappings, DTD2 -> DTD1. mappings = {'result' : 'example', 'cname' : 'coursename', 'student' : 'course', 'course' : 'student', 'name' : 'name', 'mark' : 'grade'} # Let's say the query returns this: f = open('./example/example.xml', 'r') xml = [ l.strip() for l in f.readlines() ] xml = ''.join(xml) soup = BeautifulStoneSoup(xml) f.close() allStudents = soup.findAll('course') queryOutput = [match.parent for match in allStudents] #STUDENTS WILL DUPLICATE, DICKS, WILL NEED TO PRUNE THE TREE run('./example/example.dtd', mappings, queryOutput)
import re from extra_lib.BeautifulSoup import BeautifulStoneSoup, Tag f = open("./example/example.xml", "r") xml = [l.strip() for l in f.readlines()] xml = "".join(xml) soup = BeautifulStoneSoup(xml) f.close() # Begin XML Translation Grammar # result -> (course*) # result = BeautifulStoneSoup('<result></result>') # courses -> (cname, student*) # courses = [ (c.coursename, c) for c in soup.findAll('course') ] res = "<result>" for c in soup.findAll("course"): res += "<course>" s = c.findPrevious("student") res += str(c.coursename) res += str(s.find("name")) res += str(s.grade) res += "</course>" # print result.prettify() res += "</result>" result = BeautifulStoneSoup(res) result.prettify() # student -> (name, mark) # students = [ (s.find('name'), s.grade, s) for s in [ c.findPrevious('student') for (name, c) in courses] ]
import re, time from extra_lib.BeautifulSoup import BeautifulStoneSoup from qsx import run start = time.time() # User specifies the following mappings, DTD2 -> DTD1. mappings = {'articles' : 'newspaper' , 'article' : 'article', 'headline' : 'headline', 'byline' : 'byline', 'lead' : 'lead', 'body' : 'body', 'notes' : 'notes'} # Let's say the query returns this: f = open('./example/example11.xml', 'r') xml = [ l.strip() for l in f.readlines() ] xml = ''.join(xml) soup = BeautifulStoneSoup(xml) f.close() allHeadlines = soup.findAll('HEADLINE', text=re.compile("Awesome headline")) queryOutput = [match.parent.parent for match in allHeadlines] run('./example/example11.dtd', mappings, queryOutput) print "Runtime: " + str(time.time() - start) + "seconds."