from smthparser import BeautyArticleProcessor from smthapi import SimpleSMTH from url_pic_finder import * smth = SimpleSMTH() out = smth.get_url_data("http://www.newsmth.net/atomic.php?act=article&board=Picture&id=569616") parser = BeautyArticleProcessor() parser.feed(unicode(out, "gbk", "ignore")) result = parser.getall() print "===================" print result["c"] print "===================" print result["ref"] print "===================" print result["sign"] print "===================" urls = "http://www.newsmth.net/bbscon.php" + result["pic"] print find_url(urls)
tries=0 def login(smth): global tries r=smth.smthLogin(userid,userpass) if r==0: key=context.gen_sid() smth.saveCj(key) else: print "login error" tries=tries+1 if tries < 3: login(smth) smth = SimpleSMTH() #sumaryFeeds=[] feed=Feed() login(smth) #get top10 topics out=smth.get_url_data("http://www.newsmth.net/rssi.php?h=1") top10parser=Top10Parser(out) articleparser=BeautyArticleProcessor() #read data from disk try: f=open(archive+"/sm.data","r") sumaryFeeds=cPickle.load(f) print "SumaryFeeds exist" f.close()
def down_image(smth,url,archive): print "download image from :%s"%url image_uid=hashlib.md5(url).hexdigest() y=url.split("/")[2] filename=image_uid+'.'+'jpg' localimage='%s/images/%s' % (archive,filename) if os.path.isfile(localimage): print localimage+"existed" return "images/"+filename if os.path.isdir(archive+"/images/") == False: os.mkdir(archive+"/images/") imageData=smth.get_url_data(url) try: #im=Image.open(StringIO(imageData)).convert("L") im=Image.open(StringIO(imageData)) #resize width,height=im.size im2=im.resize((width/2,height/2),Image.ANTIALIAS) im2.save(localimage) except: pass return "images/"+filename if __name__=="__main__": url="http://www.newsmth.net/bbscon.php?bid=382&id=570506" smth=SimpleSMTH() out=smth.get_url_data(url) for imageurl in find_url(unicode(out,"gbk")): print down_image(smth,imageurl)
from smthparser import BeautyArticleProcessor from smthapi import SimpleSMTH from url_pic_finder import * smth=SimpleSMTH() out=smth.get_url_data("http://www.newsmth.net/atomic.php?act=article&board=Picture&id=569616") parser=BeautyArticleProcessor() parser.feed(unicode(out,"gbk","ignore")) result=parser.getall() print "===================" print result['c'] print "===================" print result['ref'] print "===================" print result['sign'] print "===================" urls="http://www.newsmth.net/bbscon.php"+result['pic'] print find_url(urls)
def login(smth): global tries r = smth.smthLogin(userid, userpass) if r == 0: key = context.gen_sid() smth.saveCj(key) else: print "login error" tries = tries + 1 if tries < 3: login(smth) smth = SimpleSMTH() #sumaryFeeds=[] feed = Feed() login(smth) #get top10 topics out = smth.get_url_data("http://www.newsmth.net/rssi.php?h=1") top10parser = Top10Parser(out) articleparser = BeautyArticleProcessor() #read data from disk try: f = open(archive + "/sm.data", "r") sumaryFeeds = cPickle.load(f) print "SumaryFeeds exist" f.close() except IOError: