def get(self): queue = getQueue() stat = getStat() now = datetime.now() while len(queue) > 0 and now - queue[0][1] > timedelta(hours=1): if queue[0][0] in stat: stat[queue[0][0]] = stat[queue[0][0]] - 1 if stat[queue[0][0]] == 0: del (stat[queue[0][0]]) else: logging.error( "Fatal error with trending stream data. Data in queue but does not in stat." ) queue.popleft() trending = [(x.key.id(), stat[x.key.id()] if x.key.id() in stat else 0) for x in Stream.query( ancestor=getStreamKey()).order(-Stream.viewCount)] trending.sort(key=lambda x: -x[1]) updateQueue(queue) updateStat(stat) updateTrendingStreams(trending) n = 0 trendingStreams = [] for stat in trending: stream = Stream.get_by_id(stat[0], getStreamKey()) if stream: trendingStreams.append((stream, stat[1])) n = n + 1 if n == Trending_Stream_Handler.digest_num: break for frequency in Trending_Stream_Handler.email_digest_counter.keys(): Trending_Stream_Handler.email_digest_counter[ frequency] = Trending_Stream_Handler.email_digest_counter[ frequency] + 1 if Trending_Stream_Handler.email_digest_counter[ frequency] == Trending_Stream_Handler.email_digest_bound[ frequency]: Trending_Stream_Handler.email_digest_counter[frequency] = 0 for userSetting in UserEmailSetting.query( UserEmailSetting.email_update_rate == frequency): emails.sendTrendingStreamEmails(userSetting.user, userSetting.name, trendingStreams) self.response.write(str(trending))
def getTrendingStreams(): trending = memcache.get('TrendingStat') if trending: return trending trending = [ (x.key.id(), 0) for x in Stream.query(ancestor=getStreamKey()).order(Stream.viewCount) ] updateTrendingStreams(trending) return trending
def getTopStreams(max_num): trending = getTrendingStreams() n = 0 trendingStreams = [] for stat in trending: stream = Stream.get_by_id(stat[0], getStreamKey()) if stream: trendingStreams.append((stream, stat[1])) n = n + 1 if n == max_num: break return trendingStreams
def getTrie(): trie = memcache.get('search') if trie: return trie trie = Trie() for stream in Stream.query(ancestor=getStreamKey()): sid = stream.key.id() for str in formatSearchContent(stream.name): trie.add(str, sid) for str in formatSearchContent(stream.tag): trie.add(str, sid) for str in formatSearchContent("".join( re.findall(r'(.+)@', stream.user))): trie.add(str, sid) updateTrie(trie) return trie
def search_streams(query_words): query_words = formatSearchContent(query_words) sids = [] relevence = {} trie = getTrie() for word in query_words: sids_of_word = set() for string in trie.searchSubstring(word): sids_of_string = trie.get(string) for sid in sids_of_string: if not (sid in relevence): relevence[sid] = 0.0 relevence[sid] = relevence[sid] + float( len(word)) / len(string) sids_of_word = sids_of_word.union(sids_of_string) sids.append(sids_of_word) ret = [ Stream.get_by_id(sid, getStreamKey()) for sid in reduce(lambda x, y: x.intersection(y), sids) ] return sorted(ret, key=lambda x: -relevence[x.key.id()])