Esempio n. 1
0
    def keywords(self):
        if self.keyword_cache_time and self.keyword_cache_time < self.updated:
            return self.keyword_cache.split(";")
        else:
            text = (clip.caption_text for clip in self.clip_set.all())
            keywords, ngrams = keywords_and_ngrams(text)
            keywords = map(lambda kw_ll: kw_ll[0], filter(lambda kw_ll: kw_ll[1] > LL_THRESHOLD, keywords))
            ngrams = map(lambda ng: u" ".join(ng[0]), ngrams)

            keywords.extend(ngrams)
            self.keyword_cache = u";".join(keywords)
            self.keyword_cache_time = timezone.now()
            self.save()
            return keywords
Esempio n. 2
0
    def keywords(self):
        if self.keyword_cache_time and self.keyword_cache_time < self.updated:
            return self.keyword_cache.split(';')
        else:
            text = (clip.caption_text for clip in self.clip_set.all())
            keywords, ngrams = keywords_and_ngrams(text)
            keywords = map(lambda kw_ll: kw_ll[0],
                           filter(lambda kw_ll: kw_ll[1] > LL_THRESHOLD, keywords))
            ngrams = map(lambda ng: u' '.join(ng[0]), ngrams)

            keywords.extend(ngrams)
            self.keyword_cache = u';'.join(keywords)
            self.keyword_cache_time = timezone.now()
            self.save()
            return keywords
Esempio n. 3
0
def keywords(request, track_id):
    track = get_object_or_404(Track, pk=track_id)
    item = track.item
    text = (clip.caption_text for clip in track.clip_set.all())
    kw, ngrams = keywords_and_ngrams(text)

    def keyword_html(sorted_x):
        tags = ""
        for (word, ll) in sorted_x[0:100]:
            # Word size determined by log likelihood
            # only using 5 classes but we could follow wikipedia tag cloud equation
            # using tmax and tmin to determine the classes, ignored at the moment
            # until we know what we want as output
            size = int(ll / 20)
            if size > 4:
                size = 4
            tags += '<a href="#" class="tag c{}">{}</a> '.format(size, word)
        return tags

    def ngram_html(ngrams):
        tags = ""
        # scale = max(n for _, n in ngrams) if ngrams else 0

        for ((w1, w2), n) in reversed(ngrams):
            size = 4  # 4 * int(n / scale)
            tags += '<a href="#" class="tag c{}">{} {}</a> '.format(
                size, w1, w2)
        return tags

    return render(
        request, 'spindle/keywords.html', {
            'title': u"Keywords: '{}'".format(item.name),
            'item': item,
            'track': track,
            'keywordblock': keyword_html(kw),
            'ngramblock': ngram_html(ngrams),
            'oxitems_keywords': item.keywords
        })
Esempio n. 4
0
def keywords(request, track_id):
    track = get_object_or_404(Track, pk=track_id)
    item = track.item
    text = (clip.caption_text for clip in track.clip_set.all())
    kw, ngrams = keywords_and_ngrams(text)

    def keyword_html(sorted_x):
        tags = ""
        for (word, ll) in sorted_x[0:100]:
            # Word size determined by log likelihood
            # only using 5 classes but we could follow wikipedia tag cloud equation
            # using tmax and tmin to determine the classes, ignored at the moment
            # until we know what we want as output
            size = int(ll / 20)
            if size > 4:
                size = 4
            tags += '<a href="#" class="tag c{}">{}</a> '.format(size, word)
        return tags

    def ngram_html(ngrams):
        tags = ""
        # scale = max(n for _, n in ngrams) if ngrams else 0

        for ((w1, w2), n) in reversed(ngrams):
            size = 4            # 4 * int(n / scale)
            tags += '<a href="#" class="tag c{}">{} {}</a> '.format(size, w1, w2)
        return tags

    return render(request, 'spindle/keywords.html', {
            'title': u"Keywords: '{}'".format(item.name),
            'item': item,
            'track': track,
            'keywordblock': keyword_html(kw),
            'ngramblock': ngram_html(ngrams),
            'oxitems_keywords': item.keywords
            })