def get_context_data(self, request, **kwargs): context = super(AttributeWordView, self).get_context_data(request, **kwargs) dataset = context['dataset'] analysis = context['analysis'] attribute = context['attribute'] value = context['value'] word = Word.objects.get(dataset=dataset, type=kwargs['word']) documents = word.document_set.filter(attribute=attribute, attributevaluedocument__value=value) words = [] for document in documents: w = WordSummary(word.type) set_word_context(w, document, analysis) words.append(w) w.url = '%s/%s/values/%s/documents/%d?kwic=%s' \ % (context['attributes_url'], attribute.name, value.value, document.id, word.type) w.doc_name = document.filename w.doc_id = document.id context['words'] = words context['breadcrumb'].word(word) context['attribute_post_link'] = '/words/%s' % word.type return context
def top_words_tab(topic, topic_url, images_url): tab = Tab("Top Words", path='topics/top_words') word_url = '%s/words/' % topic_url topicwords = topic.topicword_set.filter( word__ngram=False).order_by('-count') words = [] for topicword in topicwords[:100]: percent = float(topicword.count) / topic.total_count w = WordSummary(topicword.word.type, percent) w.url = word_url + topicword.word.type words.append(w) tab.add(word_chart_widget(words)) # tab.add(Widget('Word Cloud',content_html=unigram_cloud(words))) tab.add(word_cloud_widget(words, title='Word Cloud')) ttcloud = turbo_topics_cloud_widget(topic) if ttcloud: tab.add(ttcloud) ngcloud = ngram_cloud_widget(topic, word_url) if ngcloud: tab.add(ngcloud) tab.add(words_in_context_widget(images_url, words)) return tab
def get_context_data(self, request, **kwargs): dataset_name = kwargs['dataset'] analysis_name = kwargs['analysis'] word = Word.objects.get(dataset__name=dataset_name, type=kwargs['word']) filter = TopicFilterByWord(Analysis.objects.get(dataset__name=dataset_name, name=analysis_name), 0) filter.current_word = word context = super(TopicWordView, self).get_context_data(request, extra_filters=[filter], **kwargs) analysis = context['analysis'] topic = context['topic'] context['word'] = word documents = word.documenttopicword_set.filter(topic=topic).order_by( 'document__filename') docs = [] for dtw in documents: d = dtw.document w = WordSummary(word.type) set_word_context(w, d, analysis, topic.number) docs.append(w) w.url = "%s/%d/documents/%d?kwic=%s" % (context['topics_url'], topic.number, d.id, word.type) w.doc_name = d.filename w.doc_id = d.id context['documents'] = docs context['breadcrumb'].word(word) context['topic_post_link'] = '/words/%s' % word.type word_url = '%s/%d/words/' % (context['topics_url'], topic.number) context['tabs'] = [self._topic_word_tab(analysis, word, word_url, context['IMAGES'])] return context
def word_in_context_widget(word, word_url, images_url): w = Widget('Word In Context', 'words/word_in_context') words = [] for i in range(0,5): ws = WordSummary(word.type, number=i) ws.url = word_url words.append(ws) w['IMAGES'] = images_url w['words'] = words return w
def get_words(attribute, value, attributes_url, token_count): words = [] attrvalwords = get_attrvalwords(attribute, value) for attrvalword in attrvalwords[:100]: type = attrvalword.word.type percent = float(attrvalword.count) / token_count w = WordSummary(type, percent) w.url = (attributes_url+'/'+attribute.name+'/values/'+ value.value+'/words/'+type) words.append(w) return words
def get_topics(analysis, attribute, value, analysis_url, token_count): topics = [] topic_set = analysis.topic_set.all() attrvaltopics = attribute.attributevaluetopic_set.filter(value=value, topic__in=topic_set).order_by('-count') for attrvaltopic in attrvaltopics[:10]: type = attrvaltopic.topic.name percent = float(attrvaltopic.count) / token_count t = WordSummary(type, percent) t.url = analysis_url + '/topics/%s' % (attrvaltopic.topic.number) topics.append(t) return topics
def get_ngrams(attribute, value, attributes_url, tokens): attrvalngrams = attribute.attributevalueword_set.filter( value=value).order_by('-count') attrvalngrams = attrvalngrams.filter(word__ngram=True) ngrams = [] for attrvalngram in attrvalngrams[:10]: type = attrvalngram.word.type percent = float(attrvalngram.count) / tokens w = WordSummary(type, percent) w.url = (attributes_url+'/'+attribute.name+'/values/'+ value.value+'/words/'+type) ngrams.append(w) return ngrams
def ngram_cloud_widget(topic, word_url): topicngrams = topic.topicword_set.filter( word__ngram=True).order_by('-count') ngrams = [] for topicngram in topicngrams[:10]: percent = float(topicngram.count) / topic.total_count w = WordSummary(topicngram.word.type, percent) w.url = word_url + topicngram.word.type ngrams.append(w) if ngrams: # Name must not contain spaces! return word_cloud_widget(ngrams, title='N-grams') return None
def word_in_context(request, dataset, analysis, word, topic=None): analysis = Analysis.objects.get(name=analysis, dataset__name=dataset) w = Word.objects.get(dataset__name=dataset, type=word) word_context = WordSummary(word) if topic is None: docset = w.documentword_set.all() else: topic = Topic.objects.get(analysis=analysis, number=topic) docset = topic.documenttopicword_set.filter(word=w) num_docs = len(docset) d = docset[random.randint(0, num_docs - 1)] word_context.left_context, word_context.word, word_context.right_context \ = d.document.get_context_for_word(word, analysis, topic.number if topic else None) word_context.doc_name = d.document.filename word_context.doc_id = d.document.id return HttpResponse(anyjson.dumps(vars(word_context)))