def show_post(request,model_id,document_id): """ Show a text analysis using a LdaModel """ ldamodel = get_object_or_404(LdaModel, pk = model_id) document = get_object_or_404(Document, pk = document_id) if not document.cleaned_content: document.clean_content() #Obtengo la prediccion en base al modelo y NBM if ldamodel and ldamodel.stemming and d.steamed_content: text = document.steamed_content else: text = document.cleaned_content prediction = predict(ldamodel,text) #Dejo como topics relevantes solo los mayores a un 10% de participacion relevant_topics = cut(prediction,18) #Preparo el output, una lista de nombres relevant_topics = map(lambda (a,b,c,d): a.label,relevant_topics) #Construyo un nuevo formato para la prediccion, adecuado para el template prediction = dict(map(lambda (a,b,c,d):(a.id,b),prediction)) words = text.split(" ") topics = Topic.objects.filter(active__exact = 1, ldamodel = ldamodel) topicword = {} print "Largo del texto %s" % len(words) for topic in topics: not_in_topic = 0 not_in_dic = 0 document_topic_score = int(math.ceil(Documenttopic.objects.filter(document = document).filter(topic = topic)[0].value*100)) topicword[topic.id] = {} topicword[topic.id]['lda_score'] = document_topic_score topicword[topic.id]['prediction_score'] = prediction[topic.id] topicword[topic.id]['palabras'] = {} topicword[topic.id]['name'] = topic.label for w in words: topicword[topic.id]['palabras'][w] = 0 try: word = Word.objects.get(name__exact = w) try: topicw = Topicword.objects.get(topic = topic, word = word) topicword[topic.id]['palabras'][w] = normTopicValue(topicw.value,True) except Exception as e: #print "Word %s not found in topic %s" % (w,topic.id) not_in_topic += 1 except Exception: #print "Word %s not found in dictionary" % w not_in_dic += 1 #print topicword[topic.id] print "Total de palabras no encontradas ",not_in_topic print "% de palabras no encontradas en " + topic.label print not_in_topic*100/len(words) return render_to_response('application/post.tpl', {'topicword':topicword.iteritems(),'documento':document,'prediction':relevant_topics,'model':ldamodel})
def classify(self,ldamodel,content,cut,value): if not ldamodel or value < cut: return [] else: return [{'topic':topic.id,'topic_label':topic.label,'value':value,'quality':quality,'keywords':keywords,'subtopics':self.classify(self.getModel(topic),content,cut,value)} for (topic,value,quality,keywords) in predict(ldamodel,content)]