Esempio n. 1
0
def show_post(request,model_id,document_id):
    """ Show a text analysis using a LdaModel """

    ldamodel = get_object_or_404(LdaModel, pk = model_id)
    document = get_object_or_404(Document, pk = document_id)

    if not document.cleaned_content:
        document.clean_content()
    
    #Obtengo la prediccion en base al modelo y NBM

    if ldamodel and ldamodel.stemming and d.steamed_content:
        text = document.steamed_content
    else:
        text = document.cleaned_content

    prediction = predict(ldamodel,text)
    #Dejo como topics relevantes solo los mayores a un 10% de participacion
    relevant_topics = cut(prediction,18)
    #Preparo el output, una lista de nombres
    relevant_topics = map(lambda (a,b,c,d): a.label,relevant_topics)
    
    #Construyo un nuevo formato para la prediccion, adecuado para el template
    prediction = dict(map(lambda (a,b,c,d):(a.id,b),prediction))
    
    words = text.split(" ")
    topics = Topic.objects.filter(active__exact = 1, ldamodel = ldamodel)
    topicword = {}

    print "Largo del texto %s" % len(words)
    
    for topic in topics:
        not_in_topic = 0
        not_in_dic = 0

        document_topic_score = int(math.ceil(Documenttopic.objects.filter(document = document).filter(topic = topic)[0].value*100))

        topicword[topic.id] = {}
        topicword[topic.id]['lda_score'] = document_topic_score
        topicword[topic.id]['prediction_score'] = prediction[topic.id]
        topicword[topic.id]['palabras'] = {}
        topicword[topic.id]['name'] = topic.label
        
        for w in words:
            topicword[topic.id]['palabras'][w] = 0
            try:
                word = Word.objects.get(name__exact = w)
                try:
                    topicw = Topicword.objects.get(topic = topic, word = word)
                    topicword[topic.id]['palabras'][w] = normTopicValue(topicw.value,True)
                except Exception as e:
                    #print "Word %s not found in topic %s" % (w,topic.id)
                    not_in_topic += 1
            except Exception:
                #print "Word %s not found in dictionary" % w
                not_in_dic += 1
        #print topicword[topic.id]
        print "Total de palabras no encontradas ",not_in_topic
        print "% de palabras no encontradas en " + topic.label
        print not_in_topic*100/len(words)
    
    return render_to_response('application/post.tpl',
        {'topicword':topicword.iteritems(),'documento':document,'prediction':relevant_topics,'model':ldamodel})
Esempio n. 2
0
 def classify(self,ldamodel,content,cut,value):
     if not ldamodel or value < cut:
         return []
     else:
         return [{'topic':topic.id,'topic_label':topic.label,'value':value,'quality':quality,'keywords':keywords,'subtopics':self.classify(self.getModel(topic),content,cut,value)} for (topic,value,quality,keywords) in predict(ldamodel,content)]