def skin(self,html): defaultDate=DefaultDate.get_by_key_name('date') jokeskin = JokePage() jokeskin.clearHtml() jokeskin.feed(str(html)) for j in jokeskin.l: try: joke= NewJoke.get_by_key_name('j'+j.get('jid')) if not joke: joke=NewJoke(key_name='j'+j.get('jid')) joke.date=defaultDate.date joke.joke = j.get('jokecontent','').strip().decode('utf-8') if j.get('img',''): joke.img = j.get('img').replace('/small/','/big/') joke.type=2 else: joke.type = 3 joke.put() except Exception,e: logging.error('111:'+str(e))
def skin0(self,html): # haha=[] ''' <div class='list-text' id='listText-242377'> <a href='###' class='list-pic' mark='242377' id='list-pic-242377' path='2012/01/18/' pic_name='242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg'> <img src='http://image.haha.mx/2012/01/18/small/242377_cc5fb6ff525c05fb833d0d973f344da5_1326872875.jpg' onerror='this.onerror=null;this.src="http://static.haha.mx/images/img-error.jpg"'/> </a> ''' defaultDate=DefaultDate.get_by_key_name('date') #haha=re.findall('(?i)<div class=\'list-text\' id=\'listText-(\d+)\'[^>]*>(.*?)</div>',html) #hahaimg=re.findall('(?i)<a [^>]*mark=\'(\d+)\'[^>]*>[^<]*?<img src=\'(.*?)\'[^>]*>[^<]*</a>',html) haha=re.findall('(?i)<p class=\"block joke-item\" id=\"joke-(\d+)\"[^>]*>(.*?)</p>',html) hahaimg=re.findall('(?i)<a [^>]*id=\"thumbnail-(\d+)\"[^>]*>[^<]*?<img src=\"(.*?)\"[^>]*>',html) imgmap={} num=0 for i,src in hahaimg: imgmap[i]=src.replace('/small/','/big/') for idn,txt in haha: if idn not in self.jokeset: self.jokeset.add(idn) joke= NewJoke.get_by_key_name('j'+idn) if not joke: joke=NewJoke(key_name='j'+idn) joke.date=defaultDate.date num+1 # joke.joke= re.sub('(?i)<[/]{0,1}[\w]{1,5} [^>]*>','',re.sub('(?i)<a [^>]*>[^<]*</a>','',html_parser.unescape(txt))) joke.joke= re.findall('(?i)<p [^>]*>(.*?)</p>',html_parser.unescape(txt))[0] if imgmap.has_key(idn): joke.img=imgmap[idn] joke.type=2 else: joke.type=3 joke.put() # logging.info(str(i)) # self.jokelist.append({'id':idn,'txt':txt,'img':imgmap[idn]}) # else: # self.jokelist.append({'id':idn,'txt':txt}) return haha,imgmap