def saveHtml(self, siteID, _title, stdUrl, _source): _source = self.htmlparser.transcode(_source) today = date.today() #_date = today.strftime("%y-%m-%d") #存储网页信息 print '.. save htmlinfo' htmlinfo = HtmlInfo(siteID = siteID, title=_title, url=stdUrl, date=today, filetitle=" ") htmlinfo.save() xmltext = self.htmlparser.transXML(stdUrl) #print '.. save htmlsource' htmlsource = HtmlSource(parsed_source=xmltext, info=htmlinfo) htmlsource.save() return htmlinfo.id
def reverseRecord(self): """ 排序后 将新记录返回 重新传输到 reptile 中 should clear all records in old database """ print ".. reverseRecord" htmlinfos = models.HtmlInfo.objects.all() for htmlinfo in htmlinfos: _htmlinfo = HtmlInfo(siteID=htmlinfo.siteID, title=htmlinfo.title, url=htmlinfo.url, date=htmlinfo.date) _htmlinfo.save() """