예제 #1
0
def top_words_tab(topic, topic_url, images_url):
    tab = Tab("Top Words", path='topics/top_words')
    
    word_url = '%s/words/' % topic_url
    topicwords = topic.topicword_set.filter(
            word__ngram=False).order_by('-count')
    words = []
    for topicword in topicwords[:100]:
        percent = float(topicword.count) / topic.total_count
        w = WordSummary(topicword.word.type, percent)
        w.url = word_url + topicword.word.type
        words.append(w)
    
    tab.add(word_chart_widget(words))
    
#    tab.add(Widget('Word Cloud',content_html=unigram_cloud(words)))
    tab.add(word_cloud_widget(words, title='Word Cloud'))
    
    ttcloud = turbo_topics_cloud_widget(topic)
    if ttcloud: tab.add(ttcloud)
    
    ngcloud = ngram_cloud_widget(topic, word_url)
    if ngcloud: tab.add(ngcloud)
    
    tab.add(words_in_context_widget(images_url, words))
    
    return tab
예제 #2
0
def ngram_cloud_widget(topic, word_url):
    topicngrams = topic.topicword_set.filter(
            word__ngram=True).order_by('-count')
    ngrams = []
    for topicngram in topicngrams[:10]:
        percent = float(topicngram.count) / topic.total_count
        w = WordSummary(topicngram.word.type, percent)
        w.url = word_url + topicngram.word.type
        ngrams.append(w)
    if ngrams:
        # Name must not contain spaces!
        return word_cloud_widget(ngrams, title='N-grams')
    return None
예제 #3
0
def turbo_topics_cloud_widget(topic):
    try:
        turbo_topics = TopicMetaInfo.objects.get(name="Turbo Topics Cloud")
        value = turbo_topics.topicmetainfovalue_set.get(topic=topic)
#        turbo_topics = analysis.extratopicinformation_set.get(
#                name="Turbo Topics Cloud")
#        value = turbo_topics.extratopicinformationvalue_set.get(topic=topic)
        text = value.value()
        words = []
        total = 0
        for line in text.split('\n')[:100]:
            if line.isspace() or not line:
                continue
            fields = line.split()
            type = '_'.join(fields[:-1])
            count = float(fields[-1])
            words.append((type, count))
            total += count
        summaries = []
        for type, count in words:
            w = WordSummary(type, count / total)
            summaries.append(w)
        # Name must not contain spaces!
        return word_cloud_widget(summaries, url=False)
    except (TopicMetaInfo.DoesNotExist,
            TopicMetaInfoValue.DoesNotExist):
        pass
    try:
        turbo_topics = TopicMetaInfo.objects.get(name="Turbo Topics N-Grams")
        value = turbo_topics.topicmetainfovalue_set.get(topic=topic)
        text = value.value
        first_ten = text.split('\n')[:10]
        rest = text.split('\n')[10:]
        # TODO(matt): make this into a cloud, called "Turbo Topics N-grams"
        # if we want to keep it.  Otherwise, just get rid of the second try
        # block.
        #context['turbo_topics_less'] = '\n'.join(first_ten)
        #context['turbo_topics_more'] = '\n'.join(rest)
        #context['extra_widgets'].append('widgets/topics/turbo_topics.html')
    except (TopicMetaInfo.DoesNotExist,
            TopicMetaInfoValue.DoesNotExist):
        pass
    return None