Exemple #1
0
class WeiboSpider(scrapy.Spider):
    # f=open('map.log','a')
    # f.write('thats ok')
    # f.write('i am fine')
    # sch=Schedule('list')
    name = "weibo"
    # allowed_domains = ["weibo.cn"]
    # check=['start','ok']
    # print 'here>'
    conn_r = redis.Redis(host="localhost", port=6379, db="1")
    # print'here<'
    login_cookies = {}
    cklist = []

    def start_requests(self):
        log.msg("start", level=log.INFO)
        # print >>self.f, 'start.'
        try:
            try:
                print "creating a Fetcher"
                self.fetcher = Fetcher()
                self.cookiefiles = self.fetcher.login()
                for filename in self.cookiefiles:
                    ck = self.read_cookie(filename=filename)
                    self.login_cookies[filename] = ck
                    self.cklist.append(filename)

                print "len:", len(self.login_cookies)
            except:
                print "oh"
            ck = self.rand_cookie()
            yield Request(
                url="http://weibo.cn/tfyiyangqianxi",
                cookies=self.login_cookies[ck],
                dont_filter=True,
                callback=self.parse_user_new,
                meta={"ck": ck, "nick": u"TFBOYS-易烊千玺", "dont_redirect": True, "handle_httpstatus_list": [302]},
            )
            # yield Request(url='http://weibo.cn/pub/topmblog?page=2',callback=self.parse_hot,cookies=self.login_cookie)
            # yield Request(url='http://weibo.cn/1768346942/follow',callback=self.get_user,cookies=self.login_cookie)

            """
            for i in range(1,25):
                hot_url = "http://weibo.cn/pub/topmblog?page="+str(i)
                yield Request(url=hot_url,callback=self.parse_hot,cookies=self.login_cookie,meta=
                {
                    #'dont_redirect': True,
                    #'handle_httpstatus_list': [302]
                })
            """
        except Exception, e:
            log.msg("Fail to start", level=log.ERROR)
            log.msg(str(e), level=log.ERROR)
Exemple #2
0
    def start_requests(self):
        log.msg("start", level=log.INFO)
        # print >>self.f, 'start.'
        try:
            try:
                print "creating a Fetcher"
                self.fetcher = Fetcher()
                self.cookiefiles = self.fetcher.login()
                for filename in self.cookiefiles:
                    ck = self.read_cookie(filename=filename)
                    self.login_cookies[filename] = ck
                    self.cklist.append(filename)

                print "len:", len(self.login_cookies)
            except:
                print "oh"
            ck = self.rand_cookie()
            yield Request(
                url="http://weibo.cn/tfyiyangqianxi",
                cookies=self.login_cookies[ck],
                dont_filter=True,
                callback=self.parse_user_new,
                meta={"ck": ck, "nick": u"TFBOYS-易烊千玺", "dont_redirect": True, "handle_httpstatus_list": [302]},
            )
            # yield Request(url='http://weibo.cn/pub/topmblog?page=2',callback=self.parse_hot,cookies=self.login_cookie)
            # yield Request(url='http://weibo.cn/1768346942/follow',callback=self.get_user,cookies=self.login_cookie)

            """
            for i in range(1,25):
                hot_url = "http://weibo.cn/pub/topmblog?page="+str(i)
                yield Request(url=hot_url,callback=self.parse_hot,cookies=self.login_cookie,meta=
                {
                    #'dont_redirect': True,
                    #'handle_httpstatus_list': [302]
                })
            """
        except Exception, e:
            log.msg("Fail to start", level=log.ERROR)
            log.msg(str(e), level=log.ERROR)