Esempio n. 1
0
def build_tree(user, town, refresh):
    # trees store post ids.
    # post ids are in the form of [creator]_[creationTime]
    lean = WTree()
    fat = WTree()

    p = r.pipeline()
    posts = r.smembers(f'user:{user}:session:tree')
    posts_list = []
    hot_factors = []

    for post in posts:
        posts_list.append(post.decode('utf-8'))
        p.get(f'post:{post}:hot_factor')

    hot_factors = p.execute()
    _calculate_hot_factors_batch(posts_list, hot_factors)

    # trim tree
    for i, post in enumerate(posts_list):
        _, creation_time = _deconstruct_post_id(post)

        if is_cold(creation_time, hot_factors[i]):
            fat.add(post, hot_factors[i])
        else:
            lean.add(post, hot_factors[i])

    # if tree is too small...
    if lean.size() < MIN_TREE_SIZE:
        _grow_tree(user, town, lean, fat, refresh)
    
    return lean, fat
Esempio n. 2
0
def get_feed_page(user, lean, fat, fat_percentage=FAT_PERCENT, page_size=PAGE_SIZE):
    posts = []
    p = r.pipeline()

    for i in range(page_size):
        coin = random.randint(1, 100)
        if coin <= fat_percentage and fat.size() > 0:
            # sampling without replacement ensures
            # no duplicate posts per page
            post = fat.pop()
        elif lean.size() > 0:
            post = lean.pop()
        elif fat.size() > 0:
            post = fat.pop()
        else:
            break
        
        # remove from tree
        p.srem(f'user:{user}:session:tree', post)    
        posts.append(post)

    if len(posts) > 0:
        p.sadd(f'user:{user}:session:seen', *posts)
        p.execute()

    return posts
Esempio n. 3
0
def _grow_tree(user, town, lean, fat, refresh):
    now = int(time.time() * 1000)
    # first fetch most recent posts (in current epoch)
    epoch = now - now % TIME_BLOCK_SIZE
    p = r.pipeline()

    # determine whether to refresh session state
    if refresh:
        tail = epoch - TIME_BLOCK_SIZE
        seen = set()
        p.delete(f'user:{user}:session:seen')
        p.delete(f'user:{user}:session:tail')
    else:
        tail = r.get(f'user:{user}:session:tail')
        if tail is None:
            tail = epoch - TIME_BLOCK_SIZE
        else:
            tail = int(tail)

        seen = r.smembers(f'user:{user}:session:seen')
        if len(seen) >= MAX_SEEN_POSTS:
            # flush seen posts
            p.delete(f'user:{user}:session:seen')
            seen = set()
    
    while lean.size() < MIN_TREE_SIZE and epoch >= INCEPTION:
        posts, hot_factors = _fetch_posts(epoch, town)

        for i in range(len(posts)):
            # if post already seen by user, ignore
            if bytes(posts[i], 'utf-8') in seen:
                continue

            post = posts[i]
            _, creation_time = _deconstruct_post_id(post)
            
            if is_cold(creation_time, hot_factors[i]):
                fat.add(post, hot_factors[i])
            else:
                lean.add(post, hot_factors[i])
                p.sadd(f'user:{user}:session:tree', post)

        # continue fetching older pasts
        epoch = tail
        if lean.size() < MIN_TREE_SIZE and epoch >= INCEPTION:
            tail -= TIME_BLOCK_SIZE
        
    p.set(f'user:{user}:session:tail', tail)
    p.execute()
Esempio n. 4
0
def _fetch_posts(epoch, town, cache=True):
    p = r.pipeline()
    posts = r.smembers(f'posts:{town}:{epoch}')
    hot_factors = []

    if len(posts) != 0:
        # posts is a set in redis cache. This is
        # to (naively) support concurrent caching of posts
        # without worrying about duplicate posts
        posts = list(posts)
        for i in range(len(posts)):
            posts[i] = posts[i].decode('utf-8')
            p.get(f'post:{posts[i]}:hot_factor')

        hot_factors = [None if elem is None else int(elem) for elem in p.execute()]
        _calculate_hot_factors_batch(posts, hot_factors)
        return posts, hot_factors

    posts = []
    cursor.execute('SELECT creator, creationTime, votes, "views" \
        FROM Posts WHERE town=? \
        AND creationTime BETWEEN ? and ?', town, epoch, epoch + TIME_BLOCK_SIZE - 1)
    
    row = cursor.fetchone()
    while row:
        post_id = _construct_post_id(row[0], row[1])
        posts.append(post_id)

        hf = calculate_hot_factor(row[1], row[2], row[3])
        hot_factors.append(hf)

        if cache:
            p.set(f'post:{post_id}:votes', row[2])
            p.set(f'post:{post_id}:views', row[3])
            p.set(f'post:{post_id}:hot_factor', hf)
            # force app to recalculate hot factor
            p.expire(f'post:{post_id}:hot_factor', HOT_FACTOR_EXPIRATION)

        row = cursor.fetchone()
    
    if cache:
        if (len(posts)) != 0:
            p.sadd(f'posts:{town}:{epoch}', *posts)
        p.execute()
    
    return posts, hot_factors
Esempio n. 5
0
def _calculate_hot_factors_batch(posts, hot_factors):
    # posts at these indices don't have hot factors
    # in cache (expired or never calculated)
    indices = []
    p = r.pipeline()

    for i in range(len(posts)):
        post = posts[i]
        if hot_factors[i] is None:
            indices.append(i)
            p.get(f'post:{post}:votes')
            p.get(f'post:{post}:views')

    stats = p.execute()
    inputs = []
    for i in range(0, len(stats), 2):
        post = posts[indices[i//2]]
        creator, creation_time = _deconstruct_post_id(post)

        # if votes or views doesn't exist
        if stats[i] is None or stats[i+1] is None:
            # kinda bothers me that there is no way to "batch"
            # SELECTs
            cursor.execute('SELECT votes, "views" FROM Posts\
                WHERE creator=? AND creationTime=?', creator, creation_time)
            row = cursor.fetchone()
            stats[i], stats[i+1] = row[0], row[1]
            p.set(f'post:{post}:votes', row[0])
            p.set(f'post:{post}:views', row[1])
        else:
            stats[i], stats[i+1] = int(stats[i]), int(stats[i+1])

        hf = calculate_hot_factor(creation_time, stats[i], stats[i+1])
        hot_factors[indices[i//2]] = hf

        p.set(f'post:{post}:hot_factor', hf)
        # reuse hot factor up to HOT_FACTOR_EXPIRATION
        p.expire(f'post:{post}:hot_factor', HOT_FACTOR_EXPIRATION)

    p.execute()