("http://tathata.lofter.com/rss", "lofter", 5), ("http://janicezz.lofter.com/rss", "lofter", 5), ("http://nbyazi.lofter.com/rss", "lofter", 5), ("http://fandi.lofter.com/rss", "lofter", 5), ("http://waufs.lofter.com/rss", "lofter", 5), ("http://jacksonmind.lofter.com/rss", "lofter", 5), ("http://xiaoye.lofter.com/rss", "lofter", 5), ("http://patata.lofter.com/rss", "lofter", 5), ("http://tinoleung.lofter.com/rss", "lofter", 5) ] for feed in rss_feeds: feed_url = feed[0] provider = feed[1] photo_type = feed[2] s = PhotoSpider(provider=provider) s.parser.assign('photo_type', photo_type) s.setUserAgent() print feed_url for i in range(5): r = feedparser.parse(feed_url) if (r['entries']): break else: #if(r.has_key('bozo_exception')): #print r['bozo_exception'].getMessage() print 'rss feed parse error, retry...' sleep(2) for target in r['entries']: s.append(url=target['link']) print target['link']
s = PhotoSpider(provider = 'lofter') s.setUserAgent() for rss in rss_list: r = feedparser.parse(rss) for target in r['entries']: #print target['link'] s.append(url = target['link']) s.proceed(1000) ''' #推他存档 #queryurl = 'http://imovie.tuita.com/archive' #6 - 电影 queryurl = 'http://jxh1964.tuita.com/archive' #6 - 电影 #queryurl = 'http://yamijazz.tuita.com/archive' #5 - 摄影 #queryurl = 'http://longmaotx.tuita.com/archive' #5 - 摄影 s = PhotoSpider(provider = 'tuita_archive') s.parser.assign('photo_type', 6) s.setUserAgent() s.append(url = queryurl) s.proceed(1000) ''' #点点存档 - 发现的接口 #queryurl = 'http://ump-cn.diandian.com/archive?lite=1&month=201203' #queryurl = 'http://allposter.diandian.com/archive?lite=1&month=201201' queryurl = 'http://movielife.diandian.com/archive?lite=1&month=201102' s = PhotoSpider(provider = 'diandian_archive') s.parser.assign('photo_type', 6) #movie s.setUserAgent() s.append(url = queryurl) s.proceed(1000)
#coding: utf8 from wedspider.spider import Spider from photo.photo_spider import PhotoSpider #每日更新 if __name__ == "__main__": #花瓣美女 import uuid #since=xxx 从第几条开始抓 queryurl = 'http://huaban.com/favorite/beauty/?' + uuid.uuid1().hex[0:8] + '&limit=100&since=3860744' s = PhotoSpider(provider = 'huaban') s.setUserAgent() s.append(url = queryurl) s.proceed(9999)