コード例 #1
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_actual_and_predicted_values(input_dict):
    bterms=input_dict['bterms']
    bow_model_constructor=input_dict['bow_model_constructor']
    vocabulary=bow_model_constructor._vocab_to_idx()

    actual_values=[0]*len(vocabulary)
    for bterm in bterms:
        if bterm in vocabulary:
            actual_values[vocabulary[bterm]]=1
            #print bterm

    heuristics=flatten(input_dict['heuristics'])

    return {'apv':[{'name': h.name,'predicted':list(h.scores),'actual':actual_values} for h in heuristics]}
コード例 #2
0
def lbd_actual_and_predicted_values(input_dict):
    bterms = input_dict['bterms']
    bow_model_constructor = input_dict['bow_model_constructor']
    vocabulary = bow_model_constructor._vocab_to_idx()

    actual_values = [0] * len(vocabulary)
    for bterm in bterms:
        if bterm in vocabulary:
            actual_values[vocabulary[bterm]] = 1
            #print bterm

    heuristics = flatten(input_dict['heuristics'])

    return {
        'apv': [{
            'name': h.name,
            'predicted': list(h.scores),
            'actual': actual_values
        } for h in heuristics]
    }
コード例 #3
0
def lbd_select_ensemble_heuristic_post(postdata, input_dict, output_dict):
    widget_id = postdata.get('widget_id')[0]

    from workflows.textflows_dot_net.serialization_utils import ToNetObj
    import LatinoInterfaces
    output_dict = {}
    output_dict['serialized_adc'] = LatinoInterfaces.LatinoCF.Save(
        ToNetObj(input_dict['adc']))
    output_dict['vocabulary'] = input_dict[
        'bow_model_constructor'].get_feature_names()
    output_dict['heuristic_scores'] = [{
        'name': hevr.name,
        'scores': hevr.scores.tolist()
    } for hevr in flatten(input_dict['heuristic_scores'])]
    output_dict['bterms'] = input_dict['bterms']
    output_dict['serialized_dataset'] = LatinoInterfaces.LatinoCF.Save(
        ToNetObj(input_dict['dataset']))
    #output_dict['primary_heuristic_index']=input_dict['primary_heuristic_index']

    output_dict['primary_heuristic_index'] = int(
        postdata.get('heuristic_index', [-1])[0])

    return output_dict  #{'heuristic_index': selected_heuristic}
コード例 #4
0
ファイル: library.py プロジェクト: YishuiLi/textflows
def vipercharts_prepareCurveData(input_dict): #, subtype
	import math
	nPoints=4
	performance = flatten(input_dict['predictions'])#chartdata
	subtype = input_dict['subtype']
	kenmax = 0.5
	ratemax = 0.5
	for curve in performance:
		n = len(curve['actual'])
		negs = curve['actual'].count(0)
		poss = curve['actual'].count(1)
		if poss == 0 or negs == 0:
			raise Exception("Class Error, zero poss or zero negs, only one class or other type error.")
			#return []
		try:
			ranks = curve['rank']
		except:
			ranks = range(n+1)[1:] # ranks from 1
		paralel =[]
		for i in range(n):
			paralel.append([curve['actual'][i], float(curve['predicted'][i])])
		if (subtype == '-score'):
			ROCseries = [[0,0, '-Inf']]; PRseries = [[0,1, '-Inf']]; LIFTseries = [[0,0, '-Inf']]
			ROChull = [[0,0,'-Inf']]; COSTseries = [[0,0,'-Inf']]; RATEseries = []; KENseries = [[0,0]]; KENup=[[0,1]]; KENdown=[[0,0]]
			_oldrate = 0
			_oldloss = 0
			AUC = 0
			AUPR = 0
			ranked = sorted(paralel, key = lambda pair:pair[1], reverse=True)
			print "ranked:"+curve['name']
			print "by prediction: "+str(ranked)
			print "by actual: "+str(sorted(paralel, key = lambda pair:pair[0], reverse=True))
			k = 0
			tp = 0; fp = 0; tp_old = 0; fp_old = 0; n1 = 0; concordant_pairs = 0; discordant_pairs = 0;
			while k < len(ranked):
				addedconc = 0; addeddisc = 0;
				threshold = ranked[k][1];
				group = [x[0] for x in ranked if x[1] >= threshold]
				tp = group.count(1)
				fp = group.count(0)
				#next k is len(group).
				ties = len(group) - k
				n1 += ties * (ties-1)/2				
				concordant_pairs += tp_old * (fp - fp_old)
				discordant_pairs += fp_old * (tp - tp_old)
				
				ROCpoint = [fp*1.0/negs,tp*1.0/poss, threshold]
				ROCseries.append(ROCpoint)
				AUC += (ROCpoint[1] + ROCseries[-2][1]) * (ROCpoint[0] - ROCseries[-2][0]) * 0.5
				PRseries.append([tp*1.0/poss, tp*1.0/(tp+fp), threshold])
				AUPR += (PRseries[-1][1] + PRseries[-2][1]) * (PRseries[-1][0] - PRseries[-2][0]) * 0.5
				LIFTseries.append([len(group)*1.0/n, tp*1.0/poss, threshold])
				
				#Convex hull and lower envelope:
				while len(ROChull)>=2 and (ROChull[-1][0]==ROCpoint[0]  or (ROChull[-2][0]!=ROChull[-1][0] and (ROChull[-1][1]-ROChull[-2][1])/(ROChull[-1][0]-ROChull[-2][0]) <= (ROCpoint[1]-ROChull[-1][1])/(ROCpoint[0]-ROChull[-1][0]))):
					ROChull.pop()
					COSTseries.pop()
				ROChull.append(ROCpoint)
				if(ROCpoint[0] != ROChull[-2][0]):
					slope = (ROCpoint[1] - ROChull[-2][1]) / (ROCpoint[0] - ROChull[-2][0])
					intercept = ROCpoint[1] - slope * ROCpoint[0]
					COSTseries.append([1 / (slope + 1), (1 - intercept) / (1 + slope), threshold])
				else:
					if len(COSTseries) == 0:
						COSTseries.append([0,0,threshold])
					else:
						COSTseries[0][2] = threshold	
				COSTend = 1 - ROCpoint[1]
				
				#Rate driven curve:
				#The Rate driven curve is a list of intervals. Each interval is a set of points on the appropriate parabola. There are nPoints number of points 
				RATEinterval = []
				pi0 = poss * 1.0 / n
				pi1 = 1 - pi0
				_newrate = pi1*ROCpoint[0]+pi0*ROCpoint[1]
				_newloss = 2*(_newrate*(pi0-_newrate) + pi1*ROCpoint[0])
				RATEinterval.append([_oldrate, _oldloss, threshold, performance.index(curve)+1])
				for i in range(1, nPoints):
					alpha = i * 1.0/nPoints
					rate = _oldrate + alpha * (_newrate - _oldrate)
					loss = 2 * (rate * (pi0 - rate) + pi1 * (ROCseries[-2][0] + alpha * (ROCpoint[0] - ROCseries[-2][0])))
					RATEinterval.append([rate, loss, 0])
				RATEinterval.append([_newrate, _newloss, 0])
				RATEseries.append(RATEinterval)
				if _newloss > ratemax:
					ratemax = _newloss
				m = 0.5*(pi0+pi1*(ROCseries[-2][0]-ROCpoint[0])/(_newrate-_oldrate))
				if m<_newrate and m>_oldrate:
					mvalue=2*(m*(pi0-m)+pi1*((_newrate-m)*ROCseries[-2][0] + (m-_oldrate)*ROCpoint[0])/(_newrate - _oldrate))
					if mvalue > ratemax:
						ratemax = mvalue
				
				#Kendall curve:
				if _newrate <= pi0:
					KENseries.append([_newrate, 2*pi1*ROCpoint[0], threshold])
				else:
					if _oldrate < pi0:
						KENseries.append([pi0,(2*pi1*ROCpoint[0]-KENseries[-1][1])*(pi0-KENseries[-1][0])/(_newrate - KENseries[-1][0])+(KENseries[-1][1]), ''])
					KENseries.append([_newrate, 2*pi0*(1-ROCpoint[1]), threshold])
				if KENseries[-1][1] > kenmax:
					kenmax = KENseries[-1][1]
				_oldrate = _newrate
				_oldloss = _newloss
				
				k += len(group) - k
				tp_old = tp
				fp_old = fp
		else:
			ROCseries = [[0,0,0]]; PRseries = [[0,1,0]];  LIFTseries = [[0,0,0]]# x: y: rank:
			ranked = sorted(paralel, key=lambda pair:pair[1])
			print ranked
			k = 0
			while k < len(ranked):
				tp = 0; fp = 0;
				threshold = ranked[k][1];
				group = [x[0] for x in ranked if x[1] <= threshold]
				print group
				tp = group.count('1')
				fp = group.count('0')
				ROCpoint = [fp*1.0/negs,tp*1.0/poss, threshold]
				ROCseries.append([fp*1.0/negs, tp*1.0/poss, int(threshold)])
				PRseries.append([tp*1.0/poss, tp*1.0/(tp+fp), int(threshold)])
				LIFTseries.append([len(group)*1.0/n, tp*1.0/poss, int(threshold)])
				while len(ROChull)>=2 and (ROChull[-1][0]==ROCpoint[0]  or (ROChull[-2][0]!=ROChull[-1][0] and (ROChull[-1][1]-ROChull[-2][1])/(ROChull[-1][0]-ROChull[-2][0]) <= (ROCpoint[1]-ROChull[-1][1])/(ROCpoint[0]-ROChull[-1][0]))):
					ROChull.pop()
					COSTseries.pop()
				ROChull.append(ROCpoint)
				if(ROCpoint[0]!=ROChull[-2][0]):
					slope=(ROCpoint[1]-ROChull[-2][1])/(ROCpoint[0]-ROChull[-2][0])
					intercept=ROCpoint[1]-slope*ROCpoint[0]
					COSTseries.append([1/(1+slope), (1-intercept)/(1+slope)])
				else:
					COSTseries.append([0.0, ROCpoint[0]])
				k += len(group) - k
		
		if COSTseries[-1][0]<1:
			#append final point with max threshold
			COSTseries.append([1, COSTend, ranked[-1][1]])		
		
		curve['ROCpoints'] = ROCseries
		curve['PRpoints'] = PRseries
		curve['LIFTpoints'] = LIFTseries
		curve['ROChull'] = ROChull
		curve['COSTpoints'] = COSTseries
		curve['RATEintervals'] = RATEseries
		curve['KENpoints'] = KENseries
		curve['AUC'] = AUC
		curve['Gini'] = 2 * AUC - 1
		n0=n*(n-1)/2
		#curve['KENtau'] = (concordant_pairs - discordant_pairs) / math.sqrt((n0 - n1) * (n0 - (negs*(negs-1) + poss*(poss-1))/2))
		curve['AUPR'] = AUPR
		AUCH = 0
		for i in range(1, len(ROChull)):
			AUCH += (ROChull[i][1] + ROChull[i-1][1]) * (ROChull[i][0] - ROChull[i-1][0]) * 0.5
		curve['AUCH'] = AUCH
		performance[0]['KENmax'] = kenmax
		performance[0]['RATEmax'] = ratemax

	output_dict = {}
	output_dict['performance'] = performance
	return output_dict
コード例 #5
0
def lbd_select_ensemble_heuristic(request, input_dict, output_dict, widget):
    heuristics=[h.name for h in flatten(input_dict['heuristic_scores'])]
    return render(request, 'interactions/lbd_select_ensemble_heuristic.html',  {'heuristics': heuristics, 'widget':widget})
コード例 #6
0
def lbd_heuristic_max(input_dict):
    heuristic_names = flatten(input_dict.get('heuristics', []))
    return {'heuristic': ('Max', heuristic_names)}
コード例 #7
0
def lbd_ensemble_average_position(input_dict):
    heuristic_names = flatten(input_dict['heuristics'])
    return {'heuristic': ('AvgPos', heuristic_names)}
コード例 #8
0
def lbd_ensemble_heuristic_vote(input_dict):
    heuristic_names = flatten(input_dict['heuristics'])
    return {'heuristic': ('Vote', heuristic_names)}
コード例 #9
0
def lbd_heuristic_norm(input_dict):
    heuristic_names = flatten(input_dict.get('heuristics', []))
    return {
        'norm_heuristics':
        [('Norm', heuristic_name) for heuristic_name in heuristic_names]
    }
コード例 #10
0
def lbd_heuristic_sum(input_dict):
    heuristic_names = flatten(input_dict.get('heuristics', []))
    return {'heuristic': ('Sum', heuristic_names)}
コード例 #11
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_ensemble_average_position(input_dict):
    heuristic_names=flatten(input_dict['heuristics'])
    return {'heuristic': ('AvgPos',heuristic_names)}
コード例 #12
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_ensemble_heuristic_vote(input_dict):
    heuristic_names=flatten(input_dict['heuristics'])
    return {'heuristic': ('Vote',heuristic_names)}
コード例 #13
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_select_ensemble_heuristic_post(postdata, input_dict, output_dict):
    widget_id = postdata.get('widget_id')[0]

    from workflows.textflows_dot_net.serialization_utils import ToNetObj
    import LatinoInterfaces
    output_dict={}
    output_dict['serialized_adc']=LatinoInterfaces.LatinoCF.Save(ToNetObj(input_dict['adc']))
    output_dict['vocabulary']=input_dict['bow_model_constructor'].get_feature_names()
    output_dict['heuristic_scores']=[{'name': hevr.name, 'scores': hevr.scores.tolist()} for hevr in flatten(input_dict['heuristic_scores'])]
    output_dict['bterms']=input_dict['bterms']
    output_dict['serialized_dataset']=LatinoInterfaces.LatinoCF.Save(ToNetObj(input_dict['dataset']))
    #output_dict['primary_heuristic_index']=input_dict['primary_heuristic_index']

    output_dict['primary_heuristic_index']=int(postdata.get('heuristic_index',[-1])[0])

    return output_dict #{'heuristic_index': selected_heuristic}
コード例 #14
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_heuristic_norm(input_dict):
    heuristic_names=flatten(input_dict.get('heuristics',[]))
    return {'norm_heuristics': [('Norm',heuristic_name) for heuristic_name in heuristic_names]}
コード例 #15
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_heuristic_sum(input_dict):
    heuristic_names=flatten(input_dict.get('heuristics',[]))
    return {'heuristic': ('Sum',heuristic_names)}
コード例 #16
0
ファイル: library.py プロジェクト: xflows/textflows
def lbd_heuristic_max(input_dict):
    heuristic_names=flatten(input_dict.get('heuristics',[]))
    return {'heuristic': ('Max',heuristic_names)}