def handle(self, *args, **options): contador = 0 total = 50 materias = Materia.objects.filter(corpo__icontains="saibamais")[:total] # materias = [Materia.objects.get(id=863)] i = 0 for m in materias: print "\n", m.titulo documento = get_document(m) # html = lhtml.fromstring(m.corpo.decode('utf-8')) # documento['relacionadas'] = set([ change_host(h.attrib['href']) for h in html.cssselect('.saibamais ul li a')]) recomendadas = [str(recomendada.permalink) for recomendada in relacionadas(documento, 4)] for r in recomendadas: urls = [u.origem for u in Analytics.objects.filter(destino=r).order_by("pageviews")] documents = [(tf(m.corpo).keys(), "dentro") for m in Materia.objects.filter(permalink__in=urls)] prob = 0 if documents: documents += [ (tf(m.corpo).keys(), "fora") for m in Materia.objects.exclude(permalink__in=urls)[: len(documents)] ] prob = classifica(m, documents) # print i, round(prob, 2) if prob > 0.7: print round(prob, 2), r i += 1
def handle(self, *args, **options): import time inicio = time.time() settings.CACHE = True # if options['editoria']: # folder = Folder.objects.get(name=options['editoria']) # materias = Materia.objects.filter(status='T', folders=folder) # else: # materias = Materia.objects.filter(status='T') # materias = materias[:options['total']] materias = Materia.objects.filter(corpo__icontains='saibamais')[:100] # editorias_id = [39,31,119,214,339,216,146, # 8,133,101,94,20,42,76,105] # for f in editorias_id: # folder = Folder.objects.get(id=f) # materias = Materia.objects.filter(status='T', folders=folder)[:total] contador = 0 combinacoes = [] dict_combinacoes = {} seq = options['sequential'] for tam_comb in range(len(seq)): for comb in itertools.combinations(seq,tam_comb+1): combinacoes.append("".join(comb)) for m in materias: contador+=1 # print contador, time.time() -inicio mfolder = m.primary_folder().name if not dict_combinacoes.has_key(mfolder): dict_combinacoes[mfolder] = {} if dict_combinacoes[mfolder].has_key('_TotalMaterias'): dict_combinacoes[mfolder]['_TotalMaterias'] +=1 else: dict_combinacoes[mfolder]['_TotalMaterias']=1 for comb in combinacoes: documento = monta_doc(m) # import pdb; pdb.set_trace(); materiasSolr = relacionadas(documento, comb=comb, total=int(options['recomendadas']), similaridade=eval(options['similaridade'])) recomendadas = [] recomendadas = [str(recomendada.url) for (recomendada, score) in materiasSolr] encontradas = documento['relacionadas'].intersection(recomendadas) if any(encontradas): if dict_combinacoes[mfolder].has_key(comb): dict_combinacoes[mfolder][comb]+=1 else: dict_combinacoes[mfolder][comb]=1 # print "total de acertos em %s matérias" % (contador) dict_final={} # import pdb; pdb.set_trace(); for dc in dict_combinacoes.keys(): print "\n", dc for d in sorted(dict_combinacoes[dc].keys()): print d,dict_combinacoes[dc][d] if dict_final.has_key(d): dict_final[d]+=dict_combinacoes[dc][d] else: dict_final[d]=dict_combinacoes[dc][d] print "\nTotal Geral" for f in sorted(dict_final.keys()): print f, dict_final[f]