def keywords(self): if self.keyword_cache_time and self.keyword_cache_time < self.updated: return self.keyword_cache.split(";") else: text = (clip.caption_text for clip in self.clip_set.all()) keywords, ngrams = keywords_and_ngrams(text) keywords = map(lambda kw_ll: kw_ll[0], filter(lambda kw_ll: kw_ll[1] > LL_THRESHOLD, keywords)) ngrams = map(lambda ng: u" ".join(ng[0]), ngrams) keywords.extend(ngrams) self.keyword_cache = u";".join(keywords) self.keyword_cache_time = timezone.now() self.save() return keywords
def keywords(self): if self.keyword_cache_time and self.keyword_cache_time < self.updated: return self.keyword_cache.split(';') else: text = (clip.caption_text for clip in self.clip_set.all()) keywords, ngrams = keywords_and_ngrams(text) keywords = map(lambda kw_ll: kw_ll[0], filter(lambda kw_ll: kw_ll[1] > LL_THRESHOLD, keywords)) ngrams = map(lambda ng: u' '.join(ng[0]), ngrams) keywords.extend(ngrams) self.keyword_cache = u';'.join(keywords) self.keyword_cache_time = timezone.now() self.save() return keywords
def keywords(request, track_id): track = get_object_or_404(Track, pk=track_id) item = track.item text = (clip.caption_text for clip in track.clip_set.all()) kw, ngrams = keywords_and_ngrams(text) def keyword_html(sorted_x): tags = "" for (word, ll) in sorted_x[0:100]: # Word size determined by log likelihood # only using 5 classes but we could follow wikipedia tag cloud equation # using tmax and tmin to determine the classes, ignored at the moment # until we know what we want as output size = int(ll / 20) if size > 4: size = 4 tags += '<a href="#" class="tag c{}">{}</a> '.format(size, word) return tags def ngram_html(ngrams): tags = "" # scale = max(n for _, n in ngrams) if ngrams else 0 for ((w1, w2), n) in reversed(ngrams): size = 4 # 4 * int(n / scale) tags += '<a href="#" class="tag c{}">{} {}</a> '.format( size, w1, w2) return tags return render( request, 'spindle/keywords.html', { 'title': u"Keywords: '{}'".format(item.name), 'item': item, 'track': track, 'keywordblock': keyword_html(kw), 'ngramblock': ngram_html(ngrams), 'oxitems_keywords': item.keywords })
def keywords(request, track_id): track = get_object_or_404(Track, pk=track_id) item = track.item text = (clip.caption_text for clip in track.clip_set.all()) kw, ngrams = keywords_and_ngrams(text) def keyword_html(sorted_x): tags = "" for (word, ll) in sorted_x[0:100]: # Word size determined by log likelihood # only using 5 classes but we could follow wikipedia tag cloud equation # using tmax and tmin to determine the classes, ignored at the moment # until we know what we want as output size = int(ll / 20) if size > 4: size = 4 tags += '<a href="#" class="tag c{}">{}</a> '.format(size, word) return tags def ngram_html(ngrams): tags = "" # scale = max(n for _, n in ngrams) if ngrams else 0 for ((w1, w2), n) in reversed(ngrams): size = 4 # 4 * int(n / scale) tags += '<a href="#" class="tag c{}">{} {}</a> '.format(size, w1, w2) return tags return render(request, 'spindle/keywords.html', { 'title': u"Keywords: '{}'".format(item.name), 'item': item, 'track': track, 'keywordblock': keyword_html(kw), 'ngramblock': ngram_html(ngrams), 'oxitems_keywords': item.keywords })