def prepare(self, fromtext=False, start_idx=0, end_idx=100):
        if not fromtext:
            host = settings.get("REDIS_HOST", REDIS_HOST)
            port = settings.get("REDIS_PORT", REDIS_PORT)
            self.r = _default_redis(host, port)

            uids_set = UIDS_SET.format(spider=self.name)
            log.msg(format="Load uids from %(uids_set)s", level=log.WARNING, uids_set=uids_set)
            uids = self.r.smembers(uids_set)
            if uids == []:
                log.msg(format="Not load any uids from %(uids_set)s", level=log.WARNING, uids_set=uids_set)

        else:
            uids = []
            fname = "uidlist_20140103.txt"
            log.msg(format="Load uids from %(uids_set)s", level=log.WARNING, uids_set=fname)
            if os.getcwd()[-8:] == "cron4win":
                f = open("../test/%s" % fname, "r")
            else:
                f = open("./test/%s" % fname, "r")
            count = 0
            for line in f.readlines():
                count += 1
                if count >= start_idx and count <= end_idx:
                    uids.append(int(line.strip().split(",")[0]))
                elif count < start_idx:
                    pass
                else:
                    break
            if uids == []:
                log.msg(format="Not load any uids from %(uids_set)s", level=log.WARNING, uids_set=fname)
            f.close()

        return uids
    def prepare(self, fromtext=False, start_idx=0, end_idx=100):
        if not fromtext:
            host = settings.get('REDIS_HOST', REDIS_HOST)
            port = settings.get('REDIS_PORT', REDIS_PORT)
            self.r = _default_redis(host, port)

            uids_set = UIDS_SET.format(spider=self.name)
            log.msg(format='Load uids from %(uids_set)s', level=log.WARNING, uids_set=uids_set)
            uids = self.r.smembers(uids_set)
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s', level=log.WARNING, uids_set=uids_set)

        else:
            uids = []
            fname = 'uid_about_marine'
            log.msg(format='Load uids from %(uids_set)s', level=log.WARNING, uids_set=fname)
            f = open('./source/%s' % fname)
            count = 0
            for line in f:
                count += 1
                if count >= start_idx and count <= end_idx:
                    uids.append(int(line.strip()))
                elif count > start_idx:
                    break
                else:
                    pass
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s', level=log.WARNING, uids_set=fname)
            f.close()    
        
        return uids
Exemple #3
0
    def prepare(self, fromtext=False, start_idx=0, end_idx=100):
        if not fromtext:
            host = settings.get('REDIS_HOST', REDIS_HOST)
            port = settings.get('REDIS_PORT', REDIS_PORT)
            self.r = _default_redis(host, port)

            uids_set = UIDS_SET.format(spider=self.name)
            log.msg(format='Load uids from %(uids_set)s', level=log.WARNING, uids_set=uids_set)
            uids = self.r.smembers(uids_set)
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s', level=log.WARNING, uids_set=uids_set)

        else:
            uids = []
            fname = 'uid_about_marine'
            log.msg(format='Load uids from %(uids_set)s', level=log.WARNING, uids_set=fname)
            f = open('./source/%s' % fname)
            count = 0
            for line in f:
                count += 1
                if count >= start_idx and count <= end_idx:
                    uids.append(int(line.strip()))
                elif count > start_idx:
                    break
                else:
                    pass
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s', level=log.WARNING, uids_set=fname)
            f.close()    
        
        return uids
Exemple #4
0
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        mids_set = MIDS_SET.format(spider=self.name)
        log.msg(format='Load mids from %(mids_set)s', level=log.WARNING, mids_set=mids_set)
        mids = self.r.smembers(mids_set)
        if mids == []:
            log.msg(format='Not load any mids from %(mids_set)s', level=log.WARNING, mids_set=mids_set)

        return mids
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        uids_set = UIDS_SET.format(spider=self.name)
        log.msg(format='Load uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)
        uids = self.r.smembers(uids_set)
        if uids == []:
            log.msg(format='Not load any uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)

        return uids
Exemple #6
0
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        uids_set = UIDS_SET.format(spider=self.name)
        log.msg(format='Load uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)
        uids = self.r.smembers(uids_set)
        if uids == []:
            log.msg(format='Not load any uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)

        return uids
Exemple #7
0
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        weiboids_set = WEIBOIDS_SET.format(spider=self.name)
        log.msg(format='Load weiboids from %(weiboids_set)s', level=log.INFO, weiboids_set=weiboids_set)
        weiboids = self.r.smembers(weiboids_set)
        if weiboids == []:
            log.msg(format='Not load any weiboids from %(weiboids_set)s', level=log.INFO, weiboids_set=weiboids_set)

        return weiboids
    def prepare(self, fromtext=False, start_idx=0, end_idx=100):
        if not fromtext:
            host = settings.get('REDIS_HOST', REDIS_HOST)
            port = settings.get('REDIS_PORT', REDIS_PORT)
            self.r = _default_redis(host, port)

            uids_set = UIDS_SET.format(spider=self.name)
            log.msg(format='Load uids from %(uids_set)s',
                    level=log.WARNING,
                    uids_set=uids_set)
            uids = self.r.smembers(uids_set)
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s',
                        level=log.WARNING,
                        uids_set=uids_set)

        else:
            uids = []
            fname = 'uidlist_20140103.txt'
            log.msg(format='Load uids from %(uids_set)s',
                    level=log.WARNING,
                    uids_set=fname)
            if os.getcwd()[-8:] == 'cron4win':
                f = open('../test/%s' % fname, 'r')
            else:
                f = open('./test/%s' % fname, 'r')
            count = 0
            for line in f.readlines():
                count += 1
                if count >= start_idx and count <= end_idx:
                    uids.append(int(line.strip().split(',')[0]))
                elif count < start_idx:
                    pass
                else:
                    break
            if uids == []:
                log.msg(format='Not load any uids from %(uids_set)s',
                        level=log.WARNING,
                        uids_set=fname)
            f.close()

        return uids
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        uids_set = UIDS_SET.format(spider=self.name)
        uids_priority_set = UIDS_PRIORITY_SET.format(spider=self.name)
        self.uids_priority_set = uids_priority_set

        log.msg(format='Load uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)
        uids = self.r.smembers(uids_set)
        if uids == []:
            log.msg(format='Not load any uids from %(uids_set)s', level=log.INFO, uids_set=uids_set)

        # 初始化priority
        for uid in uids:
            if not self.r.hexists(self.uids_priority_set, uid):
                self.r.hset(self.uids_priority_set, uid, DEFAULT_SCORE)

        # 根据priority过滤uids
        uids = [uid for uid in uids if self.gt <= int(self.r.hget(self.uids_priority_set, uid)) <= self.lt]
        log.msg(format='%(length)s uids between %(gt)s -> %(lt)s will be process', level=log.INFO, length=len(uids), gt=self.gt, lt=self.lt, uids_set=uids_set)
        return uids
    def prepare(self):
        host = settings.get('REDIS_HOST', REDIS_HOST)
        port = settings.get('REDIS_PORT', REDIS_PORT)
        self.r = _default_redis(host, port)

        uids_set = UIDS_SET.format(spider=self.name)
        uids_priority_set = UIDS_PRIORITY_SET.format(spider=self.name)
        self.uids_priority_set = uids_priority_set

        log.msg(format='Load uids from %(uids_set)s',
                level=log.WARNING,
                uids_set=uids_set)
        uids = self.r.smembers(uids_set)
        if uids == []:
            log.msg(format='Not load any uids from %(uids_set)s',
                    level=log.WARNING,
                    uids_set=uids_set)

        # 初始化priority
        for uid in uids:
            if not self.r.hexists(self.uids_priority_set, uid):
                self.r.hset(self.uids_priority_set, uid, DEFAULT_SCORE)

        # 根据priority过滤uids
        uids = [
            uid for uid in uids if
            self.gt <= int(self.r.hget(self.uids_priority_set, uid)) <= self.lt
        ]
        log.msg(
            format='%(length)s uids between %(gt)s -> %(lt)s will be process',
            level=log.INFO,
            length=len(uids),
            gt=self.gt,
            lt=self.lt,
            uids_set=uids_set)
        return uids