def compose_book(self,r): book = Book() book.id = r['bid'] book.isbn10 = r['isbn10'] book.isbn13 = r['isbn13'] book.title = r['title'] book.subtitle = r['subtitle'] book.author = r['author'] book.translators = r['translators'] book.publisher = r['publisher'] book.pubdate = r['pubdate'] book.price = r['price'] book.pages = r['pages'] book.update_time = r['update_time'] book.create_time = r['create_time'] book.quantity = r['quantity'] book.series = r['series'] book.keywords = r['keywords'] book.summary = r['summary'] book.status = r['status'] book.transtr = book._transtr() return book
class SimpleHandler(sax.ContentHandler): """simple sax handler to parse the xml from douban""" def __init__(self): self.book=None self.buffer = [] self.itemName = "" self.pPages = re.compile("([0-9]+)\\D*") self.pPrice = re.compile("([0-9]*\\.[0-9]{0,2})(\\D*)") def startDocument(self): #print "Here....." self.book = Book() def startElement(self,name, attrs): self.buffer = [] #print "start:"+name if "db:attribute" == name : self.itemName = attrs.getValue('name') def endElement(self,name): if "db:attribute" == name : #print "%d,%s" %(len(self.buffer),"".join(self.buffer)) if "isbn10" ==self.itemName : self.book.isbn10 = "".join(self.buffer) elif "isbn13" ==self.itemName : self.book.isbn13 = "".join(self.buffer) elif "title" ==self.itemName : self.book.title = "".join(self.buffer) elif "subtitle" ==self.itemName : self.book.subtitle = "".join(self.buffer) elif "author" ==self.itemName : self.book.author = "".join(self.buffer) elif "publisher" ==self.itemName : self.book.publisher = "".join(self.buffer) elif "pubdate" == self.itemName : self.book.pubdate = "".join(self.buffer) elif "format" ==self.itemName : self.book.format = "".join(self.buffer) elif "binding" == self.itemName : self.book.binding = "".join(self.buffer) elif "series" == self.itemName : self.book.series = "".join(self.buffer) elif "keywords" == self.itemName : self.book.keywords = "".join(self.buffer) elif "author-intro" == self.itemName : self.book.authorintro = "".join(self.buffer) #there maybe multiple translators elif "translator" == self.itemName : self.book.translators.append("".join(self.buffer)) elif "quantity" == self.itemName : self.book.quantity = "".join(self.buffer) elif "pages" == self.itemName : self.book.pages = 0 matcher = self.pPages.match("".join(self.buffer)) if matcher : self.book.pages = int(matcher.group(1)) elif "price" == self.itemName : self.book.price = 0.0 matcher = self.pPrice.match("".join(self.buffer)) if matcher : self.book.price = float(matcher.group(1)) elif 'summary' == name : self.book.summary = "".join(self.buffer) def characters(self,content): self.buffer.append(content.strip()) def getresult(self): self.book.transtr = self.book._transtr() return self.book