Esempio n. 1
0
 def get_context_data(self, request, **kwargs):
     context = super(AttributeWordView, self).get_context_data(request, **kwargs)
     dataset = context['dataset']
     analysis = context['analysis']
     attribute = context['attribute']
     value = context['value']
     word = Word.objects.get(dataset=dataset, type=kwargs['word'])
     documents = word.document_set.filter(attribute=attribute,
             attributevaluedocument__value=value)
     words = []
     for document in documents:
         w = WordSummary(word.type)
         set_word_context(w, document, analysis)
         words.append(w)
         w.url = '%s/%s/values/%s/documents/%d?kwic=%s' \
             % (context['attributes_url'], attribute.name, value.value,
                document.id, word.type)
         w.doc_name = document.filename
         w.doc_id = document.id
 
     context['words'] = words
     context['breadcrumb'].word(word)
     context['attribute_post_link'] = '/words/%s' % word.type
 
     return context
Esempio n. 2
0
def top_words_tab(topic, topic_url, images_url):
    tab = Tab("Top Words", path='topics/top_words')
    
    word_url = '%s/words/' % topic_url
    topicwords = topic.topicword_set.filter(
            word__ngram=False).order_by('-count')
    words = []
    for topicword in topicwords[:100]:
        percent = float(topicword.count) / topic.total_count
        w = WordSummary(topicword.word.type, percent)
        w.url = word_url + topicword.word.type
        words.append(w)
    
    tab.add(word_chart_widget(words))
    
#    tab.add(Widget('Word Cloud',content_html=unigram_cloud(words)))
    tab.add(word_cloud_widget(words, title='Word Cloud'))
    
    ttcloud = turbo_topics_cloud_widget(topic)
    if ttcloud: tab.add(ttcloud)
    
    ngcloud = ngram_cloud_widget(topic, word_url)
    if ngcloud: tab.add(ngcloud)
    
    tab.add(words_in_context_widget(images_url, words))
    
    return tab
Esempio n. 3
0
 def get_context_data(self, request, **kwargs):
     dataset_name = kwargs['dataset']
     analysis_name = kwargs['analysis']
     word = Word.objects.get(dataset__name=dataset_name, type=kwargs['word'])
     
     filter = TopicFilterByWord(Analysis.objects.get(dataset__name=dataset_name,
             name=analysis_name), 0)
     filter.current_word = word
     
     context = super(TopicWordView, self).get_context_data(request, extra_filters=[filter], **kwargs)
     analysis = context['analysis']
     topic = context['topic']
     context['word'] = word
     documents = word.documenttopicword_set.filter(topic=topic).order_by(
             'document__filename')
     docs = []
     for dtw in documents:
         d = dtw.document
         w = WordSummary(word.type)
         set_word_context(w, d, analysis, topic.number)
         docs.append(w)
         w.url = "%s/%d/documents/%d?kwic=%s" % (context['topics_url'],
                 topic.number, d.id, word.type)
         w.doc_name = d.filename
         w.doc_id = d.id
     context['documents'] = docs
     context['breadcrumb'].word(word)
     context['topic_post_link'] = '/words/%s' % word.type
     
     word_url = '%s/%d/words/' % (context['topics_url'], topic.number)
     context['tabs'] = [self._topic_word_tab(analysis, word, word_url, context['IMAGES'])]
     
     return context
Esempio n. 4
0
def word_in_context_widget(word, word_url, images_url):
    w = Widget('Word In Context', 'words/word_in_context')
    words = []
    for i in range(0,5):
        ws = WordSummary(word.type, number=i)
        ws.url = word_url
        words.append(ws)
    w['IMAGES'] = images_url
    w['words'] = words
    return w
Esempio n. 5
0
def get_words(attribute, value, attributes_url, token_count):
    words = []
    attrvalwords = get_attrvalwords(attribute, value)
    for attrvalword in attrvalwords[:100]:
        type = attrvalword.word.type
        percent = float(attrvalword.count) / token_count
        w = WordSummary(type, percent)
        w.url = (attributes_url+'/'+attribute.name+'/values/'+
                value.value+'/words/'+type)
        words.append(w)
    return words
Esempio n. 6
0
def get_topics(analysis, attribute, value, analysis_url, token_count):
    topics = []
    topic_set = analysis.topic_set.all()
    attrvaltopics = attribute.attributevaluetopic_set.filter(value=value,
            topic__in=topic_set).order_by('-count')
    for attrvaltopic in attrvaltopics[:10]:
        type = attrvaltopic.topic.name
        percent = float(attrvaltopic.count) / token_count
        t = WordSummary(type, percent)
        t.url = analysis_url + '/topics/%s' % (attrvaltopic.topic.number)
        topics.append(t)
    return topics
Esempio n. 7
0
def get_ngrams(attribute, value, attributes_url, tokens):
    attrvalngrams = attribute.attributevalueword_set.filter(
            value=value).order_by('-count')
    attrvalngrams = attrvalngrams.filter(word__ngram=True)
    ngrams = []
    for attrvalngram in attrvalngrams[:10]:
        type = attrvalngram.word.type
        percent = float(attrvalngram.count) / tokens
        w = WordSummary(type, percent)
        w.url = (attributes_url+'/'+attribute.name+'/values/'+
                value.value+'/words/'+type)
        ngrams.append(w)
    return ngrams
Esempio n. 8
0
def ngram_cloud_widget(topic, word_url):
    topicngrams = topic.topicword_set.filter(
            word__ngram=True).order_by('-count')
    ngrams = []
    for topicngram in topicngrams[:10]:
        percent = float(topicngram.count) / topic.total_count
        w = WordSummary(topicngram.word.type, percent)
        w.url = word_url + topicngram.word.type
        ngrams.append(w)
    if ngrams:
        # Name must not contain spaces!
        return word_cloud_widget(ngrams, title='N-grams')
    return None
Esempio n. 9
0
def word_in_context(request, dataset, analysis, word, topic=None):
    analysis = Analysis.objects.get(name=analysis, dataset__name=dataset)
    w = Word.objects.get(dataset__name=dataset, type=word)
    word_context = WordSummary(word)

    if topic is None:
        docset = w.documentword_set.all()
    else:
        topic = Topic.objects.get(analysis=analysis, number=topic)
        docset = topic.documenttopicword_set.filter(word=w)

    num_docs = len(docset)
    d = docset[random.randint(0, num_docs - 1)]

    word_context.left_context, word_context.word, word_context.right_context \
 = d.document.get_context_for_word(word, analysis, topic.number if topic else None)

    word_context.doc_name = d.document.filename
    word_context.doc_id = d.document.id
    return HttpResponse(anyjson.dumps(vars(word_context)))