def top_documents_widget(dataset, word): w = Widget('Top Documents', 'words/top_documents') docwords = word.documentword_set.filter( document__dataset=dataset) total = reduce(lambda x, dw: x + dw.count, docwords, 0) docs = sorted([WordSummary(dw.document.filename, float(dw.count) / total) for dw in docwords]) w['chart_url'] = get_chart(docs) return w
def top_documents_widget(dataset, word): w = Widget('Top Documents', 'words/top_documents') docs = Document.objects.raw('''select doc.*,count(*) as count from visualize_wordtype as type, visualize_wordtoken as token, visualize_document as doc where type.type=%s and token.type_id=type.id and token.document_id=doc.id group by doc.id order by count desc''', [word.type]) total = float(sum([doc.count for doc in docs])) doc_summaries = [WordSummary(doc.filename, float(doc.count) / total) for doc in docs] w['chart_url'] = get_chart(doc_summaries) return w
def top_topics_widget(analysis, word): w = Widget('Top Topics', 'words/top_topics') topicwords = word.topicword_set.filter( topic__analysis=analysis) total = reduce(lambda x, tw: x + tw.count, topicwords, 0) topics = sorted([WordSummary(tw.topic.name, float(tw.count) / total) for tw in topicwords]) topics.sort() w['chart_url'] = get_chart(topics) return w
def top_topics_widget(analysis, document): w = Widget('Top Topics', 'documents/top_topics') topicdocs = document.documenttopic_set.filter(topic__analysis=analysis) total = 0 topics = [] for topicdoc in topicdocs: total += topicdoc.count for topicdoc in topicdocs: t = WordSummary(topicdoc.topic.name, float(topicdoc.count) / total) topics.append(t) topics.sort() w['chart_address'] = get_chart(topics) return w
def top_topics_widget(session, analysis, word): w = Widget('Top Topics', 'words/top_topics') analysis_tokens = word.tokens.filter(topics__analysis=analysis) total = float(analysis_tokens.count()) topicwords = analysis_tokens.values('topics').annotate(count=Count('topics')).order_by('-count') ns = current_name_scheme(session, analysis) topics = list() for count_obj in topicwords: pct = count_obj['count'] / total topic = Topic.objects.get(id=count_obj['topics']) name = topic_name_with_ns(topic, ns) topics.append(WordSummary(name,pct)) # topics = [WordSummary(Topic.objects.get(id=x['topics']).name, x['count']/total) for x in topicwords] # topicwords = word.topicword_set.filter( # topic__analysis=analysis) # total = reduce(lambda x, tw: x + tw.count, topicwords, 0) # topics = sorted([WordSummary(tw.topic.name, float(tw.count) / total) # for tw in topicwords]) # topics.sort() w['chart_url'] = get_chart(topics) return w
def top_words_chart_widget(words): w = Widget('Top Words', 'attributes/top_words_chart') w['chart_address'] = get_chart(words) return w
def word_chart_widget(words): w = Widget("Word Chart", 'topics/word_chart') w['chart_address'] = get_chart(words) return w