def top_words_tab(topic, topic_url, images_url): tab = Tab("Top Words", path='topics/top_words') word_url = '%s/words/' % topic_url topicwords = topic.topicword_set.filter( word__ngram=False).order_by('-count') words = [] for topicword in topicwords[:100]: percent = float(topicword.count) / topic.total_count w = WordSummary(topicword.word.type, percent) w.url = word_url + topicword.word.type words.append(w) tab.add(word_chart_widget(words)) # tab.add(Widget('Word Cloud',content_html=unigram_cloud(words))) tab.add(word_cloud_widget(words, title='Word Cloud')) ttcloud = turbo_topics_cloud_widget(topic) if ttcloud: tab.add(ttcloud) ngcloud = ngram_cloud_widget(topic, word_url) if ngcloud: tab.add(ngcloud) tab.add(words_in_context_widget(images_url, words)) return tab
def ngram_cloud_widget(topic, word_url): topicngrams = topic.topicword_set.filter( word__ngram=True).order_by('-count') ngrams = [] for topicngram in topicngrams[:10]: percent = float(topicngram.count) / topic.total_count w = WordSummary(topicngram.word.type, percent) w.url = word_url + topicngram.word.type ngrams.append(w) if ngrams: # Name must not contain spaces! return word_cloud_widget(ngrams, title='N-grams') return None
def turbo_topics_cloud_widget(topic): try: turbo_topics = TopicMetaInfo.objects.get(name="Turbo Topics Cloud") value = turbo_topics.topicmetainfovalue_set.get(topic=topic) # turbo_topics = analysis.extratopicinformation_set.get( # name="Turbo Topics Cloud") # value = turbo_topics.extratopicinformationvalue_set.get(topic=topic) text = value.value() words = [] total = 0 for line in text.split('\n')[:100]: if line.isspace() or not line: continue fields = line.split() type = '_'.join(fields[:-1]) count = float(fields[-1]) words.append((type, count)) total += count summaries = [] for type, count in words: w = WordSummary(type, count / total) summaries.append(w) # Name must not contain spaces! return word_cloud_widget(summaries, url=False) except (TopicMetaInfo.DoesNotExist, TopicMetaInfoValue.DoesNotExist): pass try: turbo_topics = TopicMetaInfo.objects.get(name="Turbo Topics N-Grams") value = turbo_topics.topicmetainfovalue_set.get(topic=topic) text = value.value first_ten = text.split('\n')[:10] rest = text.split('\n')[10:] # TODO(matt): make this into a cloud, called "Turbo Topics N-grams" # if we want to keep it. Otherwise, just get rid of the second try # block. #context['turbo_topics_less'] = '\n'.join(first_ten) #context['turbo_topics_more'] = '\n'.join(rest) #context['extra_widgets'].append('widgets/topics/turbo_topics.html') except (TopicMetaInfo.DoesNotExist, TopicMetaInfoValue.DoesNotExist): pass return None