class WeiboSpider(scrapy.Spider): # f=open('map.log','a') # f.write('thats ok') # f.write('i am fine') # sch=Schedule('list') name = "weibo" # allowed_domains = ["weibo.cn"] # check=['start','ok'] # print 'here>' conn_r = redis.Redis(host="localhost", port=6379, db="1") # print'here<' login_cookies = {} cklist = [] def start_requests(self): log.msg("start", level=log.INFO) # print >>self.f, 'start.' try: try: print "creating a Fetcher" self.fetcher = Fetcher() self.cookiefiles = self.fetcher.login() for filename in self.cookiefiles: ck = self.read_cookie(filename=filename) self.login_cookies[filename] = ck self.cklist.append(filename) print "len:", len(self.login_cookies) except: print "oh" ck = self.rand_cookie() yield Request( url="http://weibo.cn/tfyiyangqianxi", cookies=self.login_cookies[ck], dont_filter=True, callback=self.parse_user_new, meta={"ck": ck, "nick": u"TFBOYS-易烊千玺", "dont_redirect": True, "handle_httpstatus_list": [302]}, ) # yield Request(url='http://weibo.cn/pub/topmblog?page=2',callback=self.parse_hot,cookies=self.login_cookie) # yield Request(url='http://weibo.cn/1768346942/follow',callback=self.get_user,cookies=self.login_cookie) """ for i in range(1,25): hot_url = "http://weibo.cn/pub/topmblog?page="+str(i) yield Request(url=hot_url,callback=self.parse_hot,cookies=self.login_cookie,meta= { #'dont_redirect': True, #'handle_httpstatus_list': [302] }) """ except Exception, e: log.msg("Fail to start", level=log.ERROR) log.msg(str(e), level=log.ERROR)
def start_requests(self): log.msg("start", level=log.INFO) # print >>self.f, 'start.' try: try: print "creating a Fetcher" self.fetcher = Fetcher() self.cookiefiles = self.fetcher.login() for filename in self.cookiefiles: ck = self.read_cookie(filename=filename) self.login_cookies[filename] = ck self.cklist.append(filename) print "len:", len(self.login_cookies) except: print "oh" ck = self.rand_cookie() yield Request( url="http://weibo.cn/tfyiyangqianxi", cookies=self.login_cookies[ck], dont_filter=True, callback=self.parse_user_new, meta={"ck": ck, "nick": u"TFBOYS-易烊千玺", "dont_redirect": True, "handle_httpstatus_list": [302]}, ) # yield Request(url='http://weibo.cn/pub/topmblog?page=2',callback=self.parse_hot,cookies=self.login_cookie) # yield Request(url='http://weibo.cn/1768346942/follow',callback=self.get_user,cookies=self.login_cookie) """ for i in range(1,25): hot_url = "http://weibo.cn/pub/topmblog?page="+str(i) yield Request(url=hot_url,callback=self.parse_hot,cookies=self.login_cookie,meta= { #'dont_redirect': True, #'handle_httpstatus_list': [302] }) """ except Exception, e: log.msg("Fail to start", level=log.ERROR) log.msg(str(e), level=log.ERROR)