Example #1
0
		("http://tathata.lofter.com/rss", "lofter", 5),
		("http://janicezz.lofter.com/rss", "lofter", 5),
		("http://nbyazi.lofter.com/rss", "lofter", 5),
		("http://fandi.lofter.com/rss", "lofter", 5),
		("http://waufs.lofter.com/rss", "lofter", 5),
		("http://jacksonmind.lofter.com/rss", "lofter", 5),
		("http://xiaoye.lofter.com/rss", "lofter", 5),
		("http://patata.lofter.com/rss", "lofter", 5),
		("http://tinoleung.lofter.com/rss", "lofter", 5)		
	]

	for feed in rss_feeds:
		feed_url = feed[0]
		provider = feed[1]
		photo_type = feed[2]
		s = PhotoSpider(provider = provider)
		s.parser.assign('photo_type', photo_type)
		s.setUserAgent()
		print feed_url
		for i in range(5):
			r = feedparser.parse(feed_url)
			if(r['entries']):
				break
			else:
				#if(r.has_key('bozo_exception')):
					#print r['bozo_exception'].getMessage()
				print 'rss feed parse error, retry...'
			sleep(2)
		for target in r['entries']:
			s.append(url = target['link'])
			print target['link']
Example #2
0
#coding: utf8

from wedspider.spider import Spider
from photo.photo_spider import PhotoSpider

#每日更新
if __name__ == "__main__":
	# QQ 新闻
	queryurl = 'http://news.qq.com/photo.shtml'
	s = PhotoSpider(provider = 'news.qq')
	s.setUserAgent()
	s.append(url = queryurl)
	s.proceed(1000)
Example #3
0
        ("http://tathata.lofter.com/rss", "lofter", 5),
        ("http://janicezz.lofter.com/rss", "lofter", 5),
        ("http://nbyazi.lofter.com/rss", "lofter", 5),
        ("http://fandi.lofter.com/rss", "lofter", 5),
        ("http://waufs.lofter.com/rss", "lofter", 5),
        ("http://jacksonmind.lofter.com/rss", "lofter", 5),
        ("http://xiaoye.lofter.com/rss", "lofter", 5),
        ("http://patata.lofter.com/rss", "lofter", 5),
        ("http://tinoleung.lofter.com/rss", "lofter", 5)
    ]

    for feed in rss_feeds:
        feed_url = feed[0]
        provider = feed[1]
        photo_type = feed[2]
        s = PhotoSpider(provider=provider)
        s.parser.assign('photo_type', photo_type)
        s.setUserAgent()
        print feed_url
        for i in range(5):
            r = feedparser.parse(feed_url)
            if (r['entries']):
                break
            else:
                #if(r.has_key('bozo_exception')):
                #print r['bozo_exception'].getMessage()
                print 'rss feed parse error, retry...'
            sleep(2)
        for target in r['entries']:
            s.append(url=target['link'])
            print target['link']
Example #4
0
	s = PhotoSpider(provider = 'lofter')
	s.setUserAgent()
	for rss in rss_list:
		r = feedparser.parse(rss)
		for target in r['entries']:
			#print target['link']
			s.append(url = target['link'])
	s.proceed(1000)
	'''

	#推他存档
	#queryurl = 'http://imovie.tuita.com/archive'  #6 - 电影
	queryurl = 'http://jxh1964.tuita.com/archive' #6 - 电影
	#queryurl = 'http://yamijazz.tuita.com/archive' #5 - 摄影
	#queryurl = 'http://longmaotx.tuita.com/archive' #5 - 摄影
	s = PhotoSpider(provider = 'tuita_archive')
	s.parser.assign('photo_type', 6)
	s.setUserAgent()
	s.append(url = queryurl)
	s.proceed(1000)

	'''
	#点点存档 - 发现的接口
	#queryurl = 'http://ump-cn.diandian.com/archive?lite=1&month=201203'
	#queryurl = 'http://allposter.diandian.com/archive?lite=1&month=201201'
	queryurl = 'http://movielife.diandian.com/archive?lite=1&month=201102'
	s = PhotoSpider(provider = 'diandian_archive')
	s.parser.assign('photo_type', 6) #movie
	s.setUserAgent()
	s.append(url = queryurl)
	s.proceed(1000)
Example #5
0
#coding: utf8

from wedspider.spider import Spider
from photo.photo_spider import PhotoSpider

#每日更新
if __name__ == "__main__":
	#花瓣美女
	import uuid
	#since=xxx 从第几条开始抓
	queryurl = 'http://huaban.com/favorite/beauty/?' + uuid.uuid1().hex[0:8] + '&limit=100&since=3860744' 
	s = PhotoSpider(provider = 'huaban')
	s.setUserAgent()
	s.append(url = queryurl)
	s.proceed(9999)