Ejemplo n.º 1
0
    def reindex(self, BATCH_COUNT=50):
        sphinxql.truncate(self.index).run()
        print 'rt truncated.'

        all_keys = self.rc.keys('id_*')
        print '%d rows in redis.'%len(all_keys)

        done = 0
        for page in range(int(math.ceil(float(len(all_keys))/BATCH_COUNT))):
            ql = sphinxql.insert(self.index)
            for key in all_keys[page*BATCH_COUNT:(page+1)*BATCH_COUNT]:
                try:
                    row = json.loads(self.rc.lindex(key, 3))
                    hit_count, gid = self.rc.lindex(key, 1), self.rc.incr('GLOBAL_ID')
                    row['id'] = gid
                    row['hit_count'] = hit_count
                    row['json'] = self.rc.lindex(key, 3)
                    ql.add(row)
                    done += 1
                except:
                    pass
                    #print 'ADD error:',row['id'], row['id_hash']
            try:
                ql.run(self.SPHINX_HOST)
            except:
                import traceback
                traceback.print_exc()
            print '%d / %d'%(done, len(all_keys))
        print 'finished.'
        return done
Ejemplo n.º 2
0
    def reindex(self, BATCH_COUNT=50):
        sphinxql.truncate(self.index).run()
        print 'rt truncated.'

        all_keys = self.rc.keys('id_*')
        print '%d rows in redis.' % len(all_keys)

        done = 0
        for page in range(int(math.ceil(float(len(all_keys)) / BATCH_COUNT))):
            ql = sphinxql.insert(self.index)
            for key in all_keys[page * BATCH_COUNT:(page + 1) * BATCH_COUNT]:
                try:
                    row = json.loads(self.rc.lindex(key, 3))
                    hit_count, gid = self.rc.lindex(
                        key, 1), self.rc.incr('GLOBAL_ID')
                    row['id'] = gid
                    row['hit_count'] = hit_count
                    row['json'] = self.rc.lindex(key, 3)
                    ql.add(row)
                    done += 1
                except:
                    pass
                    #print 'ADD error:',row['id'], row['id_hash']
            try:
                ql.run(self.SPHINX_HOST)
            except:
                import traceback
                traceback.print_exc()
            print '%d / %d' % (done, len(all_keys))
        print 'finished.'
        return done
Ejemplo n.º 3
0
    def update(self, rows):
        insert_ql, replace_ql = sphinxql.insert(self.index), sphinxql.replace(self.index)
        insert_count, replace_count = 0, 0
        for row in rows:
            id_hash, full_hash, json = [row[_] for _ in ['id_hash', 'full_hash', 'json']]

            if self.rc.exists(id_hash):
                check_hash, check_hit, gid = self.rc.lrange(id_hash, 0, 2)
                if full_hash == check_hash: continue
                ql, hit_count = replace_ql, int(check_hit)+1
                self.rc.lset(id_hash, 0, full_hash)
                self.rc.lset(id_hash, 1, hit_count)
                # no need for update id, because it's never changed.
                self.rc.lset(id_hash, 3, json)
                replace_count += 1
            else:
                ql, hit_count, gid = insert_ql, 0, self.rc.incr('GLOBAL_ID')
                self.rc.rpush(id_hash, full_hash)
                self.rc.rpush(id_hash, 0)
                self.rc.rpush(id_hash, gid)
                self.rc.rpush(id_hash, json)
                row['id'] = gid
                insert_count += 1

            self.rc.expire(id_hash, self.EXPIRE_TTL)
            row['id'] = gid
            row['hit_count'] = hit_count
            ql.add(row)

        if insert_count: insert_ql.run(self.SPHINX_HOST)
        if replace_count: replace_ql.run(self.SPHINX_HOST)
        return insert_count, replace_count
Ejemplo n.º 4
0
    def update(self, rows):
        insert_ql, replace_ql = sphinxql.insert(self.index), sphinxql.replace(
            self.index)
        insert_count, replace_count = 0, 0
        for row in rows:
            id_hash, full_hash, json = [
                row[_] for _ in ['id_hash', 'full_hash', 'json']
            ]

            if self.rc.exists(id_hash):
                check_hash, check_hit, gid = self.rc.lrange(id_hash, 0, 2)
                if full_hash == check_hash: continue
                ql, hit_count = replace_ql, int(check_hit) + 1
                self.rc.lset(id_hash, 0, full_hash)
                self.rc.lset(id_hash, 1, hit_count)
                # no need for update id, because it's never changed.
                self.rc.lset(id_hash, 3, json)
                replace_count += 1
            else:
                ql, hit_count, gid = insert_ql, 0, self.rc.incr('GLOBAL_ID')
                self.rc.rpush(id_hash, full_hash)
                self.rc.rpush(id_hash, 0)
                self.rc.rpush(id_hash, gid)
                self.rc.rpush(id_hash, json)
                row['id'] = gid
                insert_count += 1

            self.rc.expire(id_hash, self.EXPIRE_TTL)
            row['id'] = gid
            row['hit_count'] = hit_count
            ql.add(row)

        if insert_count: insert_ql.run(self.SPHINX_HOST)
        if replace_count: replace_ql.run(self.SPHINX_HOST)
        return insert_count, replace_count