Exemple #1
0
def data_for_dispute(dispute,url):
	sourcecontext = get_dispute_context(dispute.claimtext)
	#svmitem = {'claimtext':dispute.claimtext,
			#'matchurl':url,'srcurl':sourcecontext['url'],
			#'srccontext':sourcecontext['text'],
			#'matchcontext':dispute.matchcontext}
	#score = f.classify_item(svmitem,model,range,mapping) - sourcecontext['badvotes']/(1+sourcecontext['goodvotes'])
	score = dispute.score - sourcecontext['badvotes']/(1+sourcecontext['goodvotes'])
	if dispute.vote == "good":
		score = 1;
	return {
	    'badvotes':sourcecontext['badvotes'],
	    'goodvotes':sourcecontext['goodvotes'],	    
	 	'claimtext':dispute.claimtext,
	 	'matchcontext':dispute.matchcontext,
		'id':dispute.id,
		'score':score,
		#'bad':t.simple_trim(sourcecontext['text']) != dispute.claimtext or t.is_bad(dispute.claimtext),
		'bad':t.is_bad(dispute.claimtext),
		'vote':dispute.vote,
		'pageurl':url,
		'sourceurl':sourcecontext['url'],
		'sourcedomain':get_domain(sourcecontext['url']),
		'sourcecontext':sourcecontext['text'].replace(dispute.claimtext,"<b>"+dispute.claimtext+"</b>"),
		'sourceprefix':sourcecontext['prefix'],
		'displaycontext':make_bold_text(dispute.claimtext,dispute.matchcontext)} 
Exemple #2
0
def urlcheck_real(url):
	"""Compute matches for a URL and store them in the database."""
	from urlcheck.api import get_dispute_context
	urlobj,created = MatchPage.objects.get_or_create(url=url,
			defaults={'url_hash':url_hash(url),'loading':True})
#	disputes = s.get_raw_disputes(url)
	print "get_raw_disputes:",url
	disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)]
	disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))]
	for dispute in remove_duplicates(disputes):
		sourcecontext = get_dispute_context(dispute[1])
		svmitem = {'claimtext':dispute[1],
			'matchurl':url,'srcurl':sourcecontext['url'],
			'srccontext':sourcecontext['text'],
			'matchcontext':"".join(dispute[2])}
		print "about to compute score with libsvm"
		svmlock.acquire()
		try:
			score = features.classify_item(svmitem,model,range,mapping)
		except:
			print "exception in classifier"
		svmlock.release()
		print "got score"
		disputeobj = SimpleMatch(page=urlobj,
			claimtext=to_unicode(dispute[1]),
			score = score,
			matchcontext=to_unicode("".join(dispute[2])))
		try:
			disputeobj.save()
		except:
			print "exception saving dispute"
	urlobj.loading = False
	urlobj.save()
	return urlobj.simplematch_set.all()
Exemple #3
0
def urlcheck_real(url):
    """Compute matches for a URL and store them in the database."""
    from urlcheck.api import get_dispute_context
    urlobj, created = MatchPage.objects.get_or_create(url=url,
                                                      defaults={
                                                          'url_hash':
                                                          url_hash(url),
                                                          'loading': True
                                                      })
    #	disputes = s.get_raw_disputes(url)
    print "get_raw_disputes:", url
    disputes = [d for d in basematcher.get_raw_disputes(url) if f.is_good(d)]
    disputes = [d for d in disputes if not t.is_bad(to_unicode(d[1]))]
    for dispute in remove_duplicates(disputes):
        sourcecontext = get_dispute_context(dispute[1])
        svmitem = {
            'claimtext': dispute[1],
            'matchurl': url,
            'srcurl': sourcecontext['url'],
            'srccontext': sourcecontext['text'],
            'matchcontext': "".join(dispute[2])
        }
        print "about to compute score with libsvm"
        svmlock.acquire()
        try:
            score = features.classify_item(svmitem, model, range, mapping)
        except:
            print "exception in classifier"
        svmlock.release()
        print "got score"
        disputeobj = SimpleMatch(page=urlobj,
                                 claimtext=to_unicode(dispute[1]),
                                 score=score,
                                 matchcontext=to_unicode("".join(dispute[2])))
        try:
            disputeobj.save()
        except:
            print "exception saving dispute"
    urlobj.loading = False
    urlobj.save()
    return urlobj.simplematch_set.all()