def get_keyword_cloud(self, minimum_count=None, limit=50, minimum_pct=0, maximum_pct=100, start_time=None, end_time=None, timespan=None): """Returns a list of tuples of keywords and their size relative to the others, as a percentage with set bounds. Sorted alphabetically. """ cloud = [] parser = HTMLParser() if timespan is not None: start_time = int(time.time()) - int(timespan) end_time = int(time.time()) else: if start_time is None and end_time is None: start_time = int(time.time()) - 3600 end_time = int(time.time()) elif start_time is None: start_time = int(end_time) - 3600 elif end_time is None: end_time = int(time.time()) keywords = self._storage.get_keywords(minimum_count=minimum_count, start_time=start_time, end_time=end_time) limitval = 0 vals = keywords.values() vals.sort(reverse=True) if limit < len(keywords): vals = vals[:limit] limitval = vals[-1] if len(vals) == 1: totalcount = vals[0] truncate = 0 else: totalcount = sum(vals) - (limitval * len(vals)) truncate = limitval pct_range = maximum_pct - minimum_pct keys = keywords.keys() keys.sort() for keyword in keys: if limitval and keywords[keyword] < limitval: continue pct = ((keywords[keyword] - truncate) / float(totalcount) * pct_range + minimum_pct) keyword = parser.escape(keyword) cloud.append((keyword, round(pct))) return sorted(cloud)
def list_searches(self, keyword=None, limit=None): """List all the search phrases which contain the given keyword, or all phrases if no keyword given. """ phrases = [] sources = map(operator.itemgetter('source'), self._hits) sources = {}.fromkeys(sources).keys() # make unique htmlparser = HTMLParser() for source in sources: if source.startswith('searches'): qpos = source.find(': ') if qpos > 0: phrase = source[qpos+2:] if keyword is None or phrase.find(keyword) != -1: phrases.append(htmlparser.escape(phrase)) if limit is not None: return phrases[:limit] return phrases
def list_searches(self, keyword=None, limit=None): """List all the search phrases which contain the given keyword, or all phrases if no keyword given. """ phrases = [] urlparser = URLParser(self._conf) htmlparser = HTMLParser() if keyword is None: referrers = self.list_referrers(limit=limit) else: referrers = self.list_referrers(limit=limit, refsearch=keyword) for ref in referrers: ref = list(urlparse.urlsplit(ref)) if ref is not None: querydata = urlparser.searchquery(ref) if querydata is not None: phrase = querydata[1] phrases.append(htmlparser.escape(phrase)) return phrases