コード例 #1
0
def circle_results(file):
    t = PrettyTable(['Description', 'Total', 'True Pos.', 'False Pos.', 'True Neg.', 'False Neg.', 'Precision', 'Sensitivity',
                     'Dice Coeff.', 'Jaccard Ind.', 'Jaccard Dist.'])
    f = open(file, 'r')
    for line in f:
        totals = line.split()
        description = totals[0]
        tp = totals[1]
        fp = totals[2]
        tn = totals[3]
        fn = totals[4]
        total = int(tp) + int(fp) + int(tn) + int(fn)
        precision = evaluation.precision(tp, fp)
        sensitivity = evaluation.sensitivity(tp, fn)
        # fmeasure = evaluation.fmeasure(tp, fp, fn)
        dicecoeff = evaluation.dicecoeff(tp, fp, fn)
        jaccardindex = evaluation.jaccardindex(tp, fp, fn)
        jaccarddifference = 1 - jaccardindex
        t.add_row([description, str(total), str(tp), str(fp), str(tn), str(fn), str(precision), str(sensitivity), str(dicecoeff),
                  str(jaccardindex), str(jaccarddifference)])

    print "Circle Detection\n"
    print t

    now = "\n" + str(datetime.now()) + "\n"
    data = t.get_string()
    r = open('circle_test_results.txt', 'ab')
    r.write(now)
    r.write(data)
    r.write("\n")
    r.close()
コード例 #2
0
ファイル: bprmf.py プロジェクト: longyahui/GCATSL
 def evaluation(self, Tr_neg, Te, positions=[5, 10, 15]):
     from evaluation import precision
     from evaluation import recall
     from evaluation import nDCG
     prec = np.zeros(len(positions))
     rec = np.zeros(len(positions))
     ndcg = np.zeros(len(positions))
     map_value, auc_value = 0.0, 0.0
     for u in Te:
         val = np.dot(self.U[u, :], self.V.transpose())
         inx = Tr_neg[u]["items"]
         A = set(Te[u])
         B = set(inx) - A
         # compute precision and recall
         ii = np.argsort(val[inx])[::-1][:max(positions)]
         prec += precision(Te[u], inx[ii], positions)
         rec += recall(Te[u], inx[ii], positions)
         # ndcg += nDCGAtN(Te[u], inx[ii], 10)
         ndcg += np.array([nDCG(Te[u], inx[ii], p) for p in positions])
         # compute map and AUC
         pos_inx = np.array(list(A))
         neg_inx = np.array(list(B))
         map_value += BPR.mean_average_precision(pos_inx, neg_inx, val)
         auc_value += BPR.auc_computation(pos_inx, neg_inx, val)
     return map_value, auc_value, ndcg, prec, rec
コード例 #3
0
def predict_with_fixed_value(forward,comment,like,submission=True):
	# type check
	if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int):
		pass
	else:
		raise TypeError("forward,comment,like should be type 'int' ")
	
	traindata,testdata = loadData()
	
	# score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100))
	
	# predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print ('generate submission file "{}"'.format(filename))
コード例 #4
0
def test_octavo(
    num_points,
    classes,
    xbound,
    ybound,
    zbound,
    max_depth,
    min_node_size,
    min_loss,
    expected,
):
    xy_parent = data_for_tests.make_octavo(
        num_points, classes, xbound, ybound, zbound
    ).values
    X = xy_parent[:, :-1]
    y = xy_parent[:, -1]

    forest = random_forest.grow_random_forest(
        X, y, num_trees=20, max_features=2, min_node_size=1
    )
    predictions = random_forest.forest_predict(forest, X)
    targets = y
    tfpns = evaluation.tfpn(predictions, targets)
    cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)
    result = np.array(
        [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
    )
    expected = np.array(expected)
    assert np.any(np.abs(expected - result) < 0.01)
コード例 #5
0
ファイル: adabpr.py プロジェクト: stephenliu0423/adabpr
 def evaluation(self, Tr_neg, Te, positions=[5, 10, 15]):
     prec = np.zeros(len(positions))
     rec = np.zeros(len(positions))
     map_value, auc_value, ndcg = 0.0, 0.0, 0.0
     for u in Te:
         val = self.M[u, :]
         inx = Tr_neg[u]['items']
         A = set(Te[u])
         B = set(inx) - A
         # compute precision and recall
         ii = np.argsort(val[inx])[::-1][:max(positions)]
         prec += precision(Te[u], inx[ii], positions)
         rec += recall(Te[u], inx[ii], positions)
         ndcg_user = nDCG(Te[u], inx[ii], 10)
         # compute map and AUC
         pos_inx = np.array(list(A))
         neg_inx = np.array(list(B))
         map_user = mean_average_precision(pos_inx, neg_inx, val)
         auc_user = auc_computation(pos_inx, neg_inx, val)
         ndcg += ndcg_user
         map_value += map_user
         auc_value += auc_user
         # outf.write(" ".join([str(map_user), str(auc_user), str(ndcg_user)])+"\n")
     # outf.close()
     return map_value/len(Te.keys()), auc_value/len(Te.keys()), ndcg/len(Te.keys()), prec/len(Te.keys()), rec/len(Te.keys())
コード例 #6
0
ファイル: adabpr.py プロジェクト: stephenliu0423/adabpr
 def evaluation(self, Tr_neg, Te, positions=[5, 10, 15]):
     prec = np.zeros(len(positions))
     rec = np.zeros(len(positions))
     map_value, auc_value, ndcg = 0.0, 0.0, 0.0
     for u in Te:
         val = self.M[u, :]
         inx = Tr_neg[u]['items']
         A = set(Te[u])
         B = set(inx) - A
         # compute precision and recall
         ii = np.argsort(val[inx])[::-1][:max(positions)]
         prec += precision(Te[u], inx[ii], positions)
         rec += recall(Te[u], inx[ii], positions)
         ndcg_user = nDCG(Te[u], inx[ii], 10)
         # compute map and AUC
         pos_inx = np.array(list(A))
         neg_inx = np.array(list(B))
         map_user = mean_average_precision(pos_inx, neg_inx, val)
         auc_user = auc_computation(pos_inx, neg_inx, val)
         ndcg += ndcg_user
         map_value += map_user
         auc_value += auc_user
         # outf.write(" ".join([str(map_user), str(auc_user), str(ndcg_user)])+"\n")
     # outf.close()
     return map_value / len(Te.keys()), auc_value / len(
         Te.keys()), ndcg / len(Te.keys()), prec / len(
             Te.keys()), rec / len(Te.keys())
コード例 #7
0
def test(test_model, test_data, test_labels, show_mistake=False):
    test_predictions = test_model.predict(test_data, verbose=0)

    # PRINT WRONG PREDICTIONS
    if show_mistake:
        for i in range(len(test_predictions)):
            stress_probability = test_predictions[i][1]
            score = abs(test_labels[i][1] - stress_probability)
            if score > 0:
                seq = ""
                for j in range(len(test_data[i])):
                    seq += idx_to_word[test_data[i][j]].strip() + " "
                print(seq, ",", score, ",", test_labels[i][1],
                      stress_probability)

    # TEST PERFORMANCE
    res_accu = eval.accuracy(test_predictions, test_labels)
    res_f1 = eval.fscore(test_predictions, test_labels)
    res_recall = eval.recall(test_predictions, test_labels)
    res_precision = eval.precision(test_predictions, test_labels)
    print('Test Accuracy: %.3f' % res_accu)
    print('Test F1-score: %.3f' % res_f1)
    print('Test Recall: %.3f' % res_recall)
    print('Test Precision: %.3f' % res_precision)

    return res_accu, res_f1, res_recall, res_precision
コード例 #8
0
def predict_with_fixed_value(forward,comment,like,submission=True):
	# type check
	if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int):
		pass
	else:
		raise TypeError("forward,comment,like should be type 'int' ")
	
	traindata,testdata = loadData()
	
	#score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)
	
	#predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)
コード例 #9
0
def online_evaluate(gtmat, pred):
    pred_labels = torch.argmax(pred.cpu(), dim=1).long()
    gt_labels = gtmat.view(-1).cpu().numpy()
    pred_labels = pred_labels.numpy()
    acc = accuracy(gt_labels, pred_labels)
    pre = precision(gt_labels, pred_labels)
    rec = recall(gt_labels, pred_labels)
    return acc, pre, rec
コード例 #10
0
def predict_with_stat(stat="median",submission=True):
	"""
	stat:
		string
		min,max,mean,median
	"""
	stat_dic = genUidStat()
    
    # 載入資料 并設定欄位
	traindata,testdata = loadData()
	
	# get stat for each uid
    
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if uid in stat_dic:
			forward.append(int(stat_dic[uid]["forward_"+stat]))
			comment.append(int(stat_dic[uid]["comment_"+stat]))
			like.append(int(stat_dic[uid]["like_"+stat]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
            
	# score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100))	
	#predict on the test data with fixed value, generate submission file
	
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if uid in stat_dic: 
				forward.append(int(stat_dic[uid]["forward_"+stat]))
				comment.append(int(stat_dic[uid]["comment_"+stat]))
				like.append(int(stat_dic[uid]["like_"+stat]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
				
				
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}.txt".format(stat)

		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print ('generate submission file "{}"'.format(filename))
コード例 #11
0
def score(uid_data,pred):
	"""
	uid_data:
		pd.DataFrame
	pred:
		list, [fp,cp,lp]
	"""
	uid_real_pred = uid_data[['forward','comment','like']]
	uid_real_pred['fp'] = pred[0]
	uid_real_pred['cp'] = pred[1]
	uid_real_pred['lp'] = pred[2]
	return precision(uid_real_pred.values)
コード例 #12
0
 def _get_best_thres_ary(self, pd_prob_mat, gt_mat):
     thres_ary = []
     for k in xrange(len(self.lbs)):
         f_val_max = -np.inf
         best_thres = None
         for thres in np.arange(0., 1. + 1e-6, 0.01):
             prec = eva.precision(pd_prob_mat[:, k], gt_mat[:, k], thres)
             rec = eva.recall(pd_prob_mat[:, k], gt_mat[:, k], thres)
             f_val = eva.f_value(prec, rec)
             if f_val > f_val_max:
                 f_val_max = f_val
                 best_thres = thres
         thres_ary.append(best_thres)
     return thres_ary
コード例 #13
0
def predict_with_stat(stat="median",submission=True):
	"""
	stat:
		string
		min,max,mean,median
	"""
	stat_dic = genUidStat()
	traindata,testdata = loadData()
	
	#get stat for each uid
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if stat_dic.has_key(uid):
			forward.append(int(stat_dic[uid]["forward_"+stat]))
			comment.append(int(stat_dic[uid]["comment_"+stat]))
			like.append(int(stat_dic[uid]["like_"+stat]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
	#score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)
	
	#predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if stat_dic.has_key(uid):
				forward.append(int(stat_dic[uid]["forward_"+stat]))
				comment.append(int(stat_dic[uid]["comment_"+stat]))
				like.append(int(stat_dic[uid]["like_"+stat]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
				
				
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}.txt".format(stat)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)
コード例 #14
0
def predict_by_search(submission=True):
	traindata,testdata = loadData()
	uid_best_pred = search_all_uid()
	print "search done,now predict on traindata and testdata..."

	#predict traindata with uid's best fp,cp,lp
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if uid_best_pred.has_key(uid):
			forward.append(int(uid_best_pred[uid][0]))
			comment.append(int(uid_best_pred[uid][1]))
			like.append(int(uid_best_pred[uid][2]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
	
	#score on the traindata
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)	
	
	
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if uid_best_pred.has_key(uid):
				forward.append(int(uid_best_pred[uid][0]))
				comment.append(int(uid_best_pred[uid][1]))
				like.append(int(uid_best_pred[uid][2]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		#generate submission file
		result = []
		filename = "weibo_predict_search.txt"
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)
コード例 #15
0
def test_diagonal_ndim(num_points, dim, max_features, expected, precision_bound):
    xy_parent = data_for_tests.make_diagonal_ndim(num_points, dim).values
    X = xy_parent[:, :-1]
    y = xy_parent[:, -1]

    forest = random_forest.grow_random_forest(
        X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1
    )
    predictions = random_forest.forest_predict(forest, X)
    targets = y
    tfpns = evaluation.tfpn(predictions, targets)
    cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)
    result = np.array(
        [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
    )
    expected = np.array(expected)
    print(precision_bound)
    assert np.any(np.abs(expected - result) < precision_bound)
コード例 #16
0
def evaluate_link(class_match_set, class_nonmatch_set, true_match_set,
                  all_comparisons):
    # Linkage evaluation
    linkage_result = evaluation.confusion_matrix(class_match_set,
                                                 class_nonmatch_set,
                                                 true_match_set,
                                                 all_comparisons)

    accuracy = evaluation.accuracy(linkage_result)
    precision = evaluation.precision(linkage_result)
    recall = evaluation.recall(linkage_result)
    fmeasure = evaluation.fmeasure(linkage_result)

    print('Linkage evaluation:')
    print('  Accuracy:    %.6f' % (accuracy))
    print('  Precision:   %.6f' % (precision))
    print('  Recall:      %.6f' % (recall))
    print('  F-measure:   %.6f' % (fmeasure))
    print('')
コード例 #17
0
    def evaluate(self):

        if self.model_used != 'vectoriel':
            self.popup(EVALUATION_ERR_MESSAGE, EVALUATION_ERR_INFO,
                       EVALUATION_ERR_TITLE)
            self.rappelLabel.setText("Rappel :" + "-")
            self.precisionLabel.setText("Precision :" + "-")
            return

        retrived_documents = self.retrieved_doc
        pertinent_documents = [
            item.text() for item in self.collectionItems if item.checkState()
        ]

        nb_pertinent_retrived_docu = evaluation.nb_pertinent_retrived_doc(
            retrived_documents, pertinent_documents)

        rappel = precision = None
        try:
            rappel = evaluation.rappel(nb_pertinent_retrived_docu,
                                       len(pertinent_documents))
        except ZeroDivisionError:
            self.popup(NO_SELECTED_DOC_MESSAGE, NO_SELECTED_DOC_INFO,
                       NO_SELECTED_DOC_TITLE)
            self.rappelLabel.setText("Rappel :" + "-")
            self.precisionLabel.setText("Precision :" + "-")
        try:
            precision = evaluation.precision(nb_pertinent_retrived_docu,
                                             len(retrived_documents))
        except:
            self.popup(NO_DOCUMENTS_MESSAGE, NO_DOCUMENTS_INFO,
                       NO_DOCUMENTS_TITLE)
            self.rappelLabel.setText("Rappel :" + "-")
            self.precisionLabel.setText("Precision :" + "-")

        if rappel != None and precision != None:
            self.rappelLabel.setText("Rappel :" + "%.2f" % rappel)
            self.precisionLabel.setText("Precision :" + "%.2f" % precision)
コード例 #18
0
ファイル: model1.py プロジェクト: pinfish888/tc_weibo
l_regr.fit(X_train, l_y_train)

f_y_predict = f_regr.predict(X_test)
c_y_predict = c_regr.predict(X_test)
l_y_predict = l_regr.predict(X_test)

# The coefficients
print ("Coefficients: \n")
print f_regr.coef_
print c_regr.coef_
print l_regr.coef_
# The mean square error
print ("Residual sum of squares: %.2f" % np.mean((f_y_predict - f_y_test) ** 2))
print ("Residual sum of squares: %.2f" % np.mean((c_y_predict - c_y_test) ** 2))
print ("Residual sum of squares: %.2f" % np.mean((l_y_predict - l_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print ("Variance score: %.2f" % f_regr.score(X_test, f_y_test))
print ("Variance score: %.2f" % c_regr.score(X_test, c_y_test))
print ("Variance score: %.2f" % l_regr.score(X_test, l_y_test))

predict_and_real = []
for i in range(0, test_num):
    predict_and_real.append([f_y_predict[i], c_y_predict[i], l_y_predict[i], f_y_test[i], c_y_test[i], l_y_test[i]])

predict_and_real0 = []
for i in range(0, test_num):
    predict_and_real0.append([0, 0, 0, f_y_test[i], c_y_test[i], l_y_test[i]])

print "Predict: {:.2f}%".format(precision(predict_and_real))
print "Zero baseline: {:.2f}%".format(precision(predict_and_real0))
コード例 #19
0
                l_sum2 += lc_list[i] ** 2
            f_std = math.sqrt(f_sum2 / N - avg_map[uid][0] ** 2)
            c_std = math.sqrt(c_sum2 / N - avg_map[uid][1] ** 2)
            l_std = math.sqrt(l_sum2 / N - avg_map[uid][2] ** 2)
            std_map[uid] = [f_std, c_std, l_std]

        predict_and_real_med.append([med_map[uid][0], med_map[uid][1], med_map[uid][2], line[1], line[2], line[3]])
        predict_and_real_avg.append([avg_map[uid][0], avg_map[uid][1], avg_map[uid][2], line[1], line[2], line[3]])
        predict_and_real_std.append([med_map[uid][0], med_map[uid][1], med_map[uid][2], line[1], line[2], line[3]])
        std_thres = 5
        if std_map[uid][0] > std_thres and 0 < predict_and_real_std[-1][0]:
            predict_and_real_std[-1][0] += 5
        if std_map[uid][1] > std_thres and 0 < predict_and_real_std[-1][1]:
            predict_and_real_std[-1][1] += 5
        if std_map[uid][2] > std_thres and 0 < predict_and_real_std[-1][2]:
            predict_and_real_std[-1][2] += 5

        predict_and_real_0_baseline.append([0, 0, 0, line[1], line[2], line[3]])

        # print(predict_and_real_med[-1])

    print "Median predict: {:.2f}%".format(precision(predict_and_real_med))
    print "Average predict: {:.2f}%".format(precision(predict_and_real_avg))
    print "Median and STD predict: {:.2f}%".format(precision(predict_and_real_std))
    print "Zero baseline: {:.2f}%".format(precision(predict_and_real_0_baseline))

    cursor.close()
    cnx.close()
except mysql.connector.Error as err:
    print(err.msg)
コード例 #20
0
ファイル: train.py プロジェクト: henryliuw/coad_dia
def main():
    # reading in
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir",
                        default='data/sampling',
                        help='determine the base dir of the dataset document')
    parser.add_argument("--sample_n",
                        default=1000,
                        type=int,
                        help='starting image index of preprocessing')
    parser.add_argument("--evidence_n",
                        default=20,
                        type=int,
                        help='how many top/bottom tiles to pick from')
    parser.add_argument("--repl_n",
                        default=3,
                        type=int,
                        help='how many resampled replications')
    parser.add_argument("--image_split",
                        action='store_true',
                        help='if use image_split')
    parser.add_argument("--batch_size",
                        default=50,
                        type=int,
                        help="batch size")
    parser.add_argument("--stage_two",
                        action='store_true',
                        help='if only use stage two patients')
    parser.add_argument("--changhai",
                        action='store_true',
                        help='if use additional data')
    args = parser.parse_args()

    feature_size = 32
    #gpu = "cuda:0"
    gpu = None
    # 5-folds cross validation
    dataloader = CVDataLoader(args, gpu, feature_size)

    n_epoch = 800
    lr = 0.0005
    if args.stage_two:
        weight_decay = 0.008
    else:
        weight_decay = 0.005
    manytimes_n = 8

    if not os.path.isdir('figure'):
        os.mkdir('figure')
    if not os.path.isdir(os.path.join(args.data_dir, 'model')):
        os.mkdir(os.path.join(args.data_dir, 'model'))

    acc_folds = []
    auc_folds = []
    c_index_folds = []
    f1_folds = []
    f1_folds_pos = []
    total_round = 0
    model_count = 0

    loss_function = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(0.8))

    for _ in range(manytimes_n):  # averaging
        for i in range(5):
            train_history = []
            test_history = []
            minimum_loss = None
            auc_fold = None
            acc_fold = None
            early_stop_count = 0

            model = Predictor(evidence_size=args.evidence_n,
                              layers=(100, 50, 1),
                              feature_size=feature_size)
            # model.apply(weight_init)
            if gpu:
                model = model.to(gpu)
            optimizer = torch.optim.RMSprop(model.parameters(),
                                            lr=lr,
                                            weight_decay=weight_decay)
            # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

            dataloader.set_fold(i)
            X_test, Y_test, df_test = dataloader.get_test()
            # X_train, Y_train, df_train = dataloader.get_train()
            print('starting fold %d' % i)

            for epoch in range(n_epoch):
                #result = model(X_train)
                #loss = nn.functional.binary_cross_entropy(result, Y_train) + nn.functional.mse_loss(result, Y_train)
                # loss = nn.functional.mse_loss(result, Y_train)
                #loss.backward()
                #optimizer.step()
                #optimizer.zero_grad()

                # batch input
                for X_train_batch, Y_train_batch, df_train_batch in dataloader:
                    # print(X_train_batch.shape)
                    result = model(X_train_batch)
                    loss = loss_function(result, Y_train_batch)
                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()

                X_train, Y_train, df_train = X_train_batch, Y_train_batch, df_train_batch

                if epoch % 20 == 0:
                    result_test = model(X_test)
                    loss_test = loss_function(result_test, Y_test)
                    #loss_test = nn.functional.mse_loss(result_test, Y_test)
                    acc_train, acc_test = accuracy(result, Y_train), accuracy(
                        result_test, Y_test)
                    auc_train, auc_test = auc(result, Y_train), auc(
                        result_test, Y_test)
                    if args.changhai:
                        c_index_train, c_index_test = 0, 0
                    else:
                        c_index_train, c_index_test = c_index(
                            result, df_train), c_index(result_test, df_test)
                    recall_train, recall_test = recall(result,
                                                       Y_train), recall(
                                                           result_test, Y_test)
                    precision_train, precision_test = precision(
                        result, Y_train), precision(result_test, Y_test)
                    f1_train_pos, f1_test_pos = f1(result, Y_train), f1(
                        result_test, Y_test)
                    f1_train, f1_test = f1(result, Y_train,
                                           negative=True), f1(result_test,
                                                              Y_test,
                                                              negative=True)
                    train_history.append(
                        (epoch, loss, acc_train, auc_train, c_index_train))
                    test_history.append(
                        (epoch, loss_test, acc_test, auc_test, c_index_test))
                    if epoch % 40 == 0:
                        print(
                            "%s epoch:%d loss:%.3f/%.3f acc:%.3f/%.3f auc:%.3f/%.3f c_index:%.3f/%.3f recall:%.3f/%.3f prec:%.3f/%.3f f1:%.3f/%.3f f1(neg):%.3f/%.3f"
                            % (time.strftime(
                                '%m.%d %H:%M:%S', time.localtime(
                                    time.time())), epoch, loss, loss_test,
                               acc_train, acc_test, auc_train, auc_test,
                               c_index_train, c_index_test, recall_train,
                               recall_test, precision_train, precision_test,
                               f1_train_pos, f1_test_pos, f1_train, f1_test))
                    # early stop
                    if minimum_loss is None or minimum_loss * 0.995 > loss_test:
                        # if minimum_loss is None or minimum_loss > loss_test:
                        if f1_train == 0:
                            continue
                        minimum_loss = loss_test
                        auc_fold = auc_test
                        acc_fold = acc_test
                        c_index_fold = c_index_test
                        f1_fold_pos = f1_test_pos
                        f1_fold = f1_test
                        early_stop_count = 0
                    elif auc_test > auc_fold and auc_test > 0.5 and acc_test >= acc_fold:
                        minimum_loss = loss_test
                        auc_fold = auc_test
                        acc_fold = acc_test
                        c_index_fold = c_index_test
                        f1_fold_pos = f1_test_pos
                        f1_fold = f1_test
                        early_stop_count = 0
                    else:
                        early_stop_count += 1
                    if early_stop_count > 2 and epoch > 100:
                        if args.stage_two:
                            if auc_fold > 0.55:
                                print('early stop at epoch %d' % epoch)
                                break
                        elif early_stop_count > 3:
                            print('early stop at epoch %d' % epoch)
                            break
                    if epoch > 500:
                        optimizer = torch.optim.RMSprop(
                            model.parameters(),
                            lr * 0.6,
                            weight_decay=weight_decay * 1.2)

            train_history = np.array(train_history)
            test_history = np.array(test_history)
            acc_folds.append(acc_fold)
            auc_folds.append(auc_fold)
            f1_folds.append(f1_fold)
            f1_folds_pos.append(f1_fold_pos)
            c_index_folds.append(c_index_fold)
            plt.plot(train_history[:, 0], train_history[:, 1], label='train')
            plt.plot(test_history[:, 0], test_history[:, 1], label='test')
            plt.legend()
            plt.savefig('figure/sample_%d_fold%d.png' % (args.sample_n, i))
            plt.cla()
            if acc_fold > 0.7 and auc_fold > 0.6 and model_count < 10:
                model.save(args.data_dir + "/model/model_%d" % model_count)
                model_count += 1
            print("acc:%.3f\tauc:%.3f\tc_index:%.3f\tf1:%.3f" %
                  (acc_fold, auc_fold, c_index_fold, f1_fold))
            total_round += 1
            if gpu:
                del dataloader.X_train, dataloader.Y_train, dataloader.X_test, dataloader.Y_test
                del X_test, Y_test, X_train, Y_train, model, optimizer
                torch.cuda.empty_cache()

    print('CV-acc:%.3f CV-auc:%.3f CV-c-index:%.3f f1:%.3f f1(neg):%.3f' %
          (sum(acc_folds) / 5 / manytimes_n, sum(auc_folds) / 5 / manytimes_n,
           sum(c_index_folds) / 5 / manytimes_n, sum(f1_folds_pos) / 5 /
           manytimes_n, sum(f1_folds) / 5 / manytimes_n))
コード例 #21
0
ファイル: recordLinkage.py プロジェクト: Alvin2580du/alvin_py
pq = evaluation.pairs_quality(cand_rec_id_pair_list, true_match_set)

print('Blocking evaluation:')
print('  Reduction ratio:    %.3f' % (rr))
print('  Pairs completeness: %.3f' % (pc))
print('  Pairs quality:      %.3f' % (pq))
print('')

# Linkage evaluation
#
linkage_result = evaluation.confusion_matrix(class_match_set,
                                             class_nonmatch_set,
                                             true_match_set, all_comparisons)

accuracy = evaluation.accuracy(linkage_result)
precision = evaluation.precision(linkage_result)
recall = evaluation.recall(linkage_result)
fmeasure = evaluation.fmeasure(linkage_result)

print('Linkage evaluation:')
print('  Accuracy:    %.3f' % (accuracy))
print('  Precision:   %.3f' % (precision))
print('  Recall:      %.3f' % (recall))
print('  F-measure:   %.3f' % (fmeasure))
print('')

linkage_time = loading_time + blocking_time + comparison_time + \
               classification_time
print('Total runtime required for linkage: %.3f sec' % (linkage_time))

# -----------------------------------------------------------------------------
コード例 #22
0
# Get cross validation accuracy for 5-fold cv
print("Ionosphere validation accuracy (default parameters):")
evaluation.cross_validation(5, ionosphere_train_features, ionosphere_train_labels, model=LogisticRegression)

# Grid search for optimal hyperparameters
print("Ionosphere grid search hyperparameters:")
ionosphere_max_val_acc, ionosphere_arg_max = evaluation.grid_search(learning_rates=lrs, epsilons=eps, lambdas=lamdas, x=ionosphere_train_features, y=ionosphere_train_labels, model=LogisticRegression)

# Accuracy on test split - train with best hyperparameters
print("Ionosphere test accuracy:")
logistic_ionosphere = LogisticRegression(ionosphere_train_features, ionosphere_train_labels)
logistic_ionosphere.fit(lr=ionosphere_arg_max[0], eps=ionosphere_arg_max[1], regularization=ionosphere_arg_max[2])
ionosphere_prediction = logistic_ionosphere.predict(ionosphere_test_features)
cm_ionosphere = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_prediction)
print("Accuracy:", evaluation.accuracy(cm_ionosphere), "Precision:", evaluation.precision(cm_ionosphere), "Recall:", evaluation.true_positive(cm_ionosphere), "F1:", evaluation.f_score(cm_ionosphere))

# 5-fold CV for naive bayes
print("Ionosphere validation accuracy (naive bayes):")
evaluation.cross_validation_naive(5, ionosphere_dataset.train_data, NaiveBayes, ionosphere_dataset.label_column, ionosphere_dataset.feature_columns)

naive_ionosphere = NaiveBayes(ionosphere_dataset.train_data, ionosphere_dataset.label_column, continuous=ionosphere_dataset.feature_columns)

print("Ionosphere test accuracy (naive bayes):")

ionosphere_pred_naive = ionosphere_dataset.test_data.apply(naive_ionosphere.predict, axis=1)
cm_ionosphere_naive = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_pred_naive.to_numpy())
print("Accuracy:", evaluation.accuracy(cm_ionosphere_naive), "Precision:", evaluation.precision(cm_ionosphere_naive), "Recall:", evaluation.true_positive(cm_ionosphere_naive), "F1:", evaluation.f_score(cm_ionosphere_naive))


# Abalone -----
コード例 #23
0
ファイル: Crossdomain2.py プロジェクト: Saito2982/CrossDomain
def calculate(method):
  global dataset
  a=0.0
  b=0.0
  c=0.0
  '''
  #setting 4
  user_Mu = np.loadtxt("./genre7-9/data/d11/user.csv",delimiter=",").astype(np.int64)
  user_Mv = np.loadtxt("./genre7-9/data/d22/user.csv",delimiter=",").astype(np.int64)
  test_user = np.loadtxt("./genre7-9/data/d12/user.csv",delimiter=",").astype(np.int64)

  item_Mu = np.loadtxt("./genre7-9/data/d11/item.csv",delimiter=",").astype(np.int64)
  item_Mv = np.loadtxt("./genre7-9/data/d22/item.csv",delimiter=",").astype(np.int64)
  test_item = np.loadtxt("./genre7-9/data/d12/item.csv",delimiter=",").astype(np.int64)

  data_Mu = np.loadtxt("./genre7-9/data/d11/data.csv",delimiter=",").astype(np.int64)
  data_Mv = np.loadtxt("./genre7-9/data/d22/data.csv",delimiter=",").astype(np.int64)
  test_data = np.loadtxt("./genre7-9/data/d12/data.csv",delimiter=",").astype(np.int64)

  train_index = np.loadtxt("./genre7-9/data/d11/index.csv",delimiter=",").astype(np.int64)
  train_index2 = np.loadtxt("./genre7-9/data/d22/index.csv",delimiter=",").astype(np.int64)
  train_index3 = np.loadtxt("./genre7-9/data/d12/index.csv",delimiter=",").astype(np.int64)
  '''
  
  #setting 5
  user_Mu = np.loadtxt("./genre7-9/data/d22/user.csv",delimiter=",").astype(np.int64)
  user_Mv = np.loadtxt("./genre7-9/data/d11/user.csv",delimiter=",").astype(np.int64)
  test_user = np.loadtxt("./genre7-9/data/d21/user.csv",delimiter=",").astype(np.int64)

  item_Mu = np.loadtxt("./genre7-9/data/d22/item.csv",delimiter=",").astype(np.int64)
  item_Mv = np.loadtxt("./genre7-9/data/d11/item.csv",delimiter=",").astype(np.int64)
  test_item = np.loadtxt("./genre7-9/data/d21/item.csv",delimiter=",").astype(np.int64)

  data_Mu = np.loadtxt("./genre7-9/data/d22/data.csv",delimiter=",").astype(np.int64)
  data_Mv = np.loadtxt("./genre7-9/data/d11/data.csv",delimiter=",").astype(np.int64)
  test_data = np.loadtxt("./genre7-9/data/d21/data.csv",delimiter=",").astype(np.int64)

  train_index = np.loadtxt("./genre7-9/data/d22/index.csv",delimiter=",").astype(np.int64)
  train_index2 = np.loadtxt("./genre7-9/data/d11/index.csv",delimiter=",").astype(np.int64)
  train_index3 = np.loadtxt("./genre7-9/data/d21/index.csv",delimiter=",").astype(np.int64)
  

  for i in range(repeate):

    Mu_matrix = makeMatrix(data_Mu, train_index, user_Mu, item_Mu)
    u = learning(method, Mu_matrix, train_index, data_Mu, user_Mu, item_Mu)

    Mv_matrix = makeMatrix(data_Mv, train_index2, user_Mv, item_Mv)
    pred = learning2(method, Mv_matrix, train_index2, data_Mv, user_Mv, item_Mv, u)

    test_matrix = makeMatrix(test_data, train_index3, test_user, test_item)
    test_users = users_in_testdata(3, test_matrix, test_user)

    # calculating precision, recall, and nDCG using "pred"
    pre, rec = ev.precision(3, pred, np.array(test_matrix.todense()), test_user, test_item)
    dcg = ev.nDCG(3, pred, np.array(test_matrix.todense()), test_user, test_item)

    # print result which shows users' average, into standard output
    print("Process ID : " + str(os.getpid()))
    print("Repeat : " + str(i + 1))
    #print("K-fold crossvalidation : " + str(j + 1) + "/" + str(sepalate))
    print("Dataset : " + dataset)
    print("Mthod : " + method)
    print("Precision : " + str(np.mean(pre[test_users.nonzero()])))
    print("Recall : " + str(np.mean(rec[test_users.nonzero()])))
    print("nDCG : " + str(np.mean(dcg[test_users.nonzero()])))
    print("=================================================================================================")

    a += np.mean(pre[test_users.nonzero()])
    b += np.mean(rec[test_users.nonzero()])
    c += np.mean(dcg[test_users.nonzero()])
    # gavege collection, numpy_ndarray not used hereafter
    #del pred
    #del test_matrix
    #del train_matrix
    gc.collect()
    #np.save("result/" + dataset + "/" + method + "/Precision.npy", precision)
    #np.save("result/" + dataset + "/" + method + "/Recall.npy", recall)
    #np.save("result/" + dataset + "/" + method + "/nDCG.npy", nDCG)
  print("Precision AVE : " + str(a / 10))
  print("Recall AVE : " + str(b / 10))
  print("nDCG AVE : " + str(c / 10))
コード例 #24
0
    def get_stats_from_prob_mat(self, pd_prob_mat, gt_mat, thres_ary):
        """Get stats from prob_mat and ground truth mat. 
        
        Args:
          pd_prob_mat: ndarray, (n_clips, n_labels)
          gt_prob_mat: ndarray, (n_clips, n_labels)
          thres_ary: list of float | 'auto' | 'no_need'. 
          
        Returns:
          stat. 
        """
        n_lbs = len(self.lbs)

        stat = {}
        if type(thres_ary) is list:
            stat['thres_ary'] = thres_ary
        elif thres_ary == 'auto':
            thres_ary = self._get_best_thres_ary(pd_prob_mat, gt_mat)
            stat['thres_ary'] = thres_ary
        elif thres_ary == 'no_need':
            thres_ary = [0.5] * len(self.lbs)
            stat['thres_ary'] = ['no_need'] * len(self.lbs)
        else:
            raise Exception("thres_ary type wrong!")

        pd_digit_mat = self._get_digit_mat_from_thres_ary(
            pd_prob_mat, thres_ary)

        # overall stat
        eer = eva.eer(pd_prob_mat.flatten(), gt_mat.flatten())
        auc = eva.roc_auc(pd_prob_mat.flatten(), gt_mat.flatten())
        (tp, fn, fp, tn) = eva.tp_fn_fp_tn(pd_digit_mat, gt_mat, 0.5)
        prec = eva.precision(pd_digit_mat, gt_mat, 0.5)
        rec = eva.recall(pd_digit_mat, gt_mat, 0.5)
        f_val = eva.f_value(prec, rec)
        stat['overall'] = {
            'tp': tp,
            'fn': fn,
            'fp': fp,
            'tn': tn,
            'precision': prec,
            'recall': rec,
            'f_value': f_val,
            'eer': eer,
            'auc': auc
        }

        # element-wise stat
        stat['event_wise'] = {}
        for k in xrange(len(self.lbs)):
            eer = eva.eer(pd_prob_mat[:, k], gt_mat[:, k])
            auc = eva.roc_auc(pd_prob_mat[:, k], gt_mat[:, k])
            (tp, fn, fp, tn) = eva.tp_fn_fp_tn(pd_digit_mat[:, k],
                                               gt_mat[:, k], 0.5)
            prec = eva.precision(pd_digit_mat[:, k], gt_mat[:, k], 0.5)
            rec = eva.recall(pd_digit_mat[:, k], gt_mat[:, k], 0.5)
            f_val = eva.f_value(prec, rec)
            stat['event_wise'][self.lbs[k]] = {
                'tp': tp,
                'fn': fn,
                'fp': fp,
                'tn': tn,
                'precision': prec,
                'recall': rec,
                'f_value': f_val,
                'eer': eer,
                'auc': auc
            }
        return stat
コード例 #25
0
def evaluate_boxes(dataset, all_boxes, output_dir, num_classes=11):
    """
    :param dataset: dataset in coco format
    :param all_boxes: detection boxes
    :param output_dir: dir to save eval.json
    :return: res_dict = {thrs_0: {'TP': x0, 'FP': y0, 'FN': z0}, ... , thrs_N: {'TP': xN, 'FP': yN, 'FN': zN}}
    """
    
    res = []
    
    gt = get_gt(dataset)
    
    imgs = dataset.COCO.imgs
    
    num_imgs = len(imgs)
    
    thrs = [0.5, 0.3]     
         
    ev_json = EvalSave(thrs)
    ev_json._set_dataset_name(dataset.name)
        
    res_dict = {}
    [res_dict.update({val: {'TP': 0, 'FP': 0, 'FN': 0}}) for val in thrs]
    
    l_limit = 2500
    u_limit = 50 * 10**3
        
    # create confusion matrix only for 10c datasets
    calc_cfn_matrix = True if "10c" in dataset.name else False
    
    for t in thrs:  
        # intialize caches for different tp, fp, fn and m_iou values
        tp_sum = [0] * int((1/ev_json._conf_stride + 1))
        fp_sum = [0] * int((1/ev_json._conf_stride + 1))
        fn_sum = [0] * int((1/ev_json._conf_stride + 1))        
        m_iou_sum = [0] * int((1/ev_json._conf_stride + 1))

        tp_sum_low = [0] * int((1/ev_json._conf_stride + 1))
        fp_sum_low = [0] * int((1/ev_json._conf_stride + 1))
        fn_sum_low = [0] * int((1/ev_json._conf_stride + 1))        
        m_iou_sum_low = [0] * int((1/ev_json._conf_stride + 1))
        
        tp_sum_mid = [0] * int((1/ev_json._conf_stride + 1))
        fp_sum_mid = [0] * int((1/ev_json._conf_stride + 1))
        fn_sum_mid = [0] * int((1/ev_json._conf_stride + 1))        
        m_iou_sum_mid = [0] * int((1/ev_json._conf_stride + 1))

        tp_sum_high = [0] * int((1/ev_json._conf_stride + 1))
        fp_sum_high = [0] * int((1/ev_json._conf_stride + 1))
        fn_sum_high = [0] * int((1/ev_json._conf_stride + 1))        
        m_iou_sum_high = [0] * int((1/ev_json._conf_stride + 1))
        
        conf_mat = [np.zeros((num_classes, num_classes)).astype(np.uint32),
                     np.zeros((num_classes, num_classes)).astype(np.uint32)]
        
        classification_error = [np.array([0]), np.array([0])]
        
        for im in range(num_imgs):
            sys.stdout.write("\rimage {:d} of {:d} - thrs = {:0.1f}".format(im+1, num_imgs, t))
            # sort out empty boxes
            n_e_boxes = []
            for idx, b in enumerate(all_boxes[1:]):
                if b[im].any():                    
                    for i in range(len(b[im])):       
                        n_e_boxes.append([b[im][i].tolist(), idx+1])
            
            boxes = get_image_boxes(dataset, all_boxes, im)
            
            for box in n_e_boxes:
                box[0][2] = box[0][2] - box[0][0]
                box[0][3] = box[0][3] - box[0][1]
                                    
            r_end =  int((1 + ev_json._conf_stride) / ev_json._conf_stride)
            conf = [c*ev_json._conf_stride for c in range(int(1/ev_json._conf_stride) + 1)]
                        
            # as coco forces you to provide a label, empty images are detected by bounding boxes containing only a label
            # validation case: empty frames were left emtpy and didn't get a label
            if not gt[im]:
                gt_ev = []                
            else:
                # test case: there's no bounding box but a label to tell the network that there's no object
                if len(gt[im][0]) == 1:
                    gt_ev = []
                else:
                    gt_ev = gt[im]
                    
                
            # prepare for size dependant evaluation
            gt_low = []
            gt_mid = []
            gt_high = []
            for g in gt_ev:
                if g[2]*g[3] <= l_limit:
                    gt_low.append(g)
                elif g[2]*g[3] > u_limit:
                    gt_high.append(g)
                else:
                    gt_mid.append(g)
            
            # TODO(eomoos): change cache arrays type to numpy array so you can easily add cache to target array!
            # eval complete dataset
            if calc_cfn_matrix:
                tp_c, fp_c, fn_c, m_iou, max_conf = evaluation.eval_with_conf(n_e_boxes, gt_ev, t, conf, calc_conf_mat=calc_cfn_matrix, conf_mat=conf_mat, correct_pred=classification_error)
            else:
                tp_c, fp_c, fn_c, m_iou, max_conf = evaluation.eval_with_conf(n_e_boxes, gt_ev, t, conf)
            # eval only with tiny objects - A <= 2500
            tp_c_low, fp_c_low, fn_c_low, m_iou_low, _ = evaluation.eval_with_conf(n_e_boxes, gt_low, t, conf)
            # eval only with medium size objects - 2500 < A < 50,000
            tp_c_mid, fp_c_mid, fn_c_mid, m_iou_mid, _ = evaluation.eval_with_conf(n_e_boxes, gt_mid, t, conf)
            # eval only with huge objects - A >= 50000
            tp_c_high, fp_c_high, fn_c_high, m_iou_high, _ = evaluation.eval_with_conf(n_e_boxes, gt_high, t, conf)
            
            # calculate sums of different size tp(conf), fp(conf), fn(conf), m_iou(conf)
            tp_sum = [tp_sum[i] + tp_c[i] for i in range(len(tp_c))]
            fp_sum = [fp_sum[i] + fp_c[i] for i in range(len(fp_c))]
            fn_sum = [fn_sum[i] + fn_c[i] for i in range(len(fn_c))]
            m_iou_sum = [m_iou_sum[i] + m_iou[i] for i in range(len(m_iou))]

            tp_sum_low = [tp_sum_low[i] + tp_c_low[i] for i in range(len(tp_c_low))]
            fn_sum_low = [fn_sum_low[i] + fn_c_low[i] for i in range(len(fn_c_low))]
            m_iou_sum_low = [m_iou_sum_low[i] + m_iou_low[i] for i in range(len(m_iou_low))]
            
            tp_sum_mid = [tp_sum_mid[i] + tp_c_mid[i] for i in range(len(tp_c_mid))]
            fn_sum_mid = [fn_sum_mid[i] + fn_c_mid[i] for i in range(len(fn_c_mid))]
            m_iou_sum_mid = [m_iou_sum_mid[i] + m_iou_mid[i] for i in range(len(m_iou_mid))]
            
            tp_sum_high = [tp_sum_high[i] + tp_c_high[i] for i in range(len(tp_c_high))]
            fn_sum_high = [fn_sum_high[i] + fn_c_high[i] for i in range(len(fn_c_high))]
            m_iou_sum_high = [m_iou_sum_high[i] + m_iou_high[i] for i in range(len(m_iou_high))]
            
        sys.stdout.write("\n")                
        add_to_res_dict(res_dict, tp_sum[0], fp_sum[0], fn_sum[0], t)
        
        
        # calc mean iou's for different evaluation scales             
        for i in range(len(tp_sum)):        
            if tp_sum[i] > 0:
                ev_json._eval['m_iou'][t].append(m_iou_sum[i]/tp_sum[i]) 
            else:
                ev_json._eval['m_iou'][t].append(0)
        
        for i in range(len(tp_sum_low)):        
            if tp_sum_low[i] > 0:
                ev_json._eval['m_iou_low'][t].append(m_iou_sum_low[i]/tp_sum_low[i]) 
            else:
                ev_json._eval['m_iou_low'][t].append(0)
        
        for i in range(len(tp_sum_mid)):        
            if tp_sum_mid[i] > 0:
                ev_json._eval['m_iou_mid'][t].append(m_iou_sum_mid[i]/tp_sum_mid[i]) 
            else:
                ev_json._eval['m_iou_mid'][t].append(0)
                
        for i in range(len(tp_sum_high)):        
            if tp_sum_high[i] > 0:
                ev_json._eval['m_iou_high'][t].append(m_iou_sum_high[i]/tp_sum_high[i]) 
            else:
                ev_json._eval['m_iou_high'][t].append(0)
        
        for i in range(len(tp_sum)):            
            ev_json._eval['prec'][t].append(evaluation.precision(tp_sum[i], fp_sum[i]))
            ev_json._eval['rec'][t].append(evaluation.recall(tp_sum[i], fn_sum[i]))
            
            ev_json._eval['rec_low'][t].append(evaluation.recall(tp_sum_low[i], fn_sum_low[i]))

            ev_json._eval['rec_mid'][t].append(evaluation.recall(tp_sum_mid[i], fn_sum_mid[i]))

            ev_json._eval['rec_high'][t].append(evaluation.recall(tp_sum_high[i], fn_sum_high[i]))
            
        flat_conf_mat = conf_mat[0].flatten()
        num_tp_dets = sum(flat_conf_mat.tolist())
        cls_acc = evaluation.classification_accuracy(classification_error[0][0], num_tp_dets)
        print(cls_acc)
        
        ev_json._eval['conf_mat'][t].append([flat_conf_mat.tolist(), num_classes])
        ev_json._eval['classification_error'][t].append(
            [cls_acc])
        
        flat_conf_mat = conf_mat[1].flatten()
        num_tp_dets = sum(flat_conf_mat.tolist())
        cls_acc = evaluation.classification_accuracy(classification_error[0][0], num_tp_dets)
        print(cls_acc)
        
        ev_json._eval['conf_mat'][t].append([flat_conf_mat.tolist(), num_classes])
        ev_json._eval['classification_error'][t].append(
            [cls_acc])
            
    ev_json._max_conf = max_conf
    
    ev_json._save_as_json(output_dir)
    
    # print(np.array(ev_json._eval['conf_mat'][0.3][0][0]).reshape((num_classes, num_classes)))
    
    return res_dict
コード例 #26
0
def run_scheme(scheme, descriptor_type, descriptor_param, num_clusters,
               clf_params, plotGraphs, PCAon, num_cols):
    print "Running scheme with the following parameters: "
    print "Scheme num: " + str(scheme) + ", BoVW: num_clusters=" + str(num_clusters) +\
        "; SVM: params:" + str(clf_params) + ";\n plotGraphs=" + str(plotGraphs) +\
          "; PCA_on=" + str(PCAon)
    start = time.time()

    # 1) Read the train and test files
    train_images_filenames = cPickle.load(
        open('train_images_filenames.dat', 'r'))
    test_images_filenames = cPickle.load(open('test_images_filenames.dat',
                                              'r'))
    train_labels = cPickle.load(open('train_labels.dat', 'r'))
    test_labels = cPickle.load(open('test_labels.dat', 'r'))
    print 'Loaded ' + str(
        len(train_images_filenames)) + ' training images filenames\
     with classes ', set(train_labels)
    print 'Loaded ' + str(
        len(test_images_filenames)) + ' testing images filenames\
     with classes ', set(test_labels)

    # 2) Extract features (train)
    D, Train_descriptors, kpt_dense, pca_train, sclr_train = computeTraining_descriptors(
        descriptor_type, descriptor_param, train_images_filenames,
        train_labels, PCAon, num_cols)

    # 3) Reduce number of features by PCA (reducing m=128 cols)
    #   Computed internally in computeTraining_descriptors()
    # 4) Compute codebook
    codebook = computeCodebook(num_clusters, D, descriptor_type,
                               descriptor_param, PCAon)
    # 5) Get training BoVW
    train_VW = getBoVW_train(codebook, num_clusters, Train_descriptors)

    # 6) Train SVM
    clf, train_scaler, D_scaled = clf_train(train_VW, train_labels, clf_params)

    # 7) Get test BoVW
    test_VW = getBoVW_test(codebook, num_clusters, test_images_filenames,
                           descriptor_type, descriptor_param, kpt_dense, PCAon,
                           pca_train, sclr_train)

    # 8) Get evaluation (accuracy, f-score, graphs, etc.)
    predictions = clf_predict(clf, clf_params, train_scaler, test_VW, D_scaled)
    # Get metrics and graphs:
    # We need to implement our own for latter integration with the rest of the project
    # Accuracy, F-score (multi-class=> average? add up?)

    acc = accuracy(test_labels, predictions)
    prec = precision(test_labels, predictions)
    rec = recall(test_labels, predictions)
    f1sc = f1score(test_labels, predictions)
    cm = confusionMatrix(test_labels, predictions)
    hits, misses = HitsAndMisses(cm)
    print "Confusion matrix:\n"
    print(str(cm))
    print("\n")
    print "Results (metrics):\n" + "Accuracy= {:04.2f}%\n" \
                                   "Precision= {:04.2f}%\n" \
                                   "Recall= {:04.2f}%\n" \
                                   "F1-score= {:04.2f}%\n" \
                                   "Hits(TP)={:d}\n" \
                                   "Misses(FN)={:d}\n".format(
        100*acc, 100*prec, 100*rec, 100*f1sc, hits, misses)
    print("\n")
    if plotGraphs:
        # Plot confusion matrix (and any other graph)
        print "Plotting confusion matrix..."
        plotConfusionMatrix(cm, test_labels)

    end = time.time()
    print 'Everything done in ' + str(end - start) + ' secs.'
コード例 #27
0
forest = random_forest.grow_random_forest(
    X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1
)

# make predictions
predictions = random_forest.forest_predict(forest, X)

# calculate the numbers of true positives, false positives, true negatives, false negatives
tfpns = evaluation.tfpn(predictions, y)

# calculate the confusion matrix
cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)

# calculate metrics: precision, sensitivity, false-positive-rate
metrics = np.array(
    [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
)

print(
f"{num_points} points are randomly generated in the unit cube in {dim}-dimensions.\n \
Those with the sum of coordinates >= {dim}/2 are labeled 1, \n those below are \
labeled 0."
)
print("The model achieves the following in sample metrics:")
print("precision:", metrics[0])
print("sensitivity:", metrics[1])
print("false-positive-rate:", metrics[2])
print('If the metrics are not 1,1,0, then there is a problem.')
# if (metrics[0] == 1) & (metrics[1] == 1) & (metrics[2] == 0):
# 	print(0)
# else:
コード例 #28
0
ファイル: train.py プロジェクト: shuishen112/CIKM2018_QMWFLM
def test_pair_wise(dns=FLAGS.dns):
    train, test, dev = load(FLAGS.data, filter=FLAGS.clean)
    test = test.reindex(np.random.permutation(test.index))

    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split()))
    print('q_question_length:{} a_question_length:{}'.format(
        q_max_sent_length, a_max_sent_length))
    print('train question unique:{}'.format(len(train['question'].unique())))
    print('train length', len(train))
    print('test length', len(test))
    print('dev length', len(dev))
    alphabet, embeddings = prepare([train, test, dev],
                                   dim=FLAGS.embedding_dim,
                                   is_embedding_needed=True,
                                   fresh=FLAGS.fresh)
    # alphabet,embeddings = prepare_300([train,test,dev])
    print('alphabet:', len(alphabet))
    with tf.Graph().as_default(), tf.device("/gpu:" + str(FLAGS.gpu)):
        # with tf.device("/cpu:0"):
        session_conf = tf.ConfigProto()
        session_conf.allow_soft_placement = FLAGS.allow_soft_placement
        session_conf.log_device_placement = FLAGS.log_device_placement
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            folder = 'runs/' + timeDay + '/' + timeStamp + '/'
            out_dir = folder + FLAGS.data
            if not os.path.exists(folder):
                os.makedirs(folder)
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            print("start build model")
            cnn = QA_CNN_quantum_extend(
                max_input_left=q_max_sent_length,
                max_input_right=a_max_sent_length,
                batch_size=FLAGS.batch_size,
                vocab_size=len(alphabet),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                dropout_keep_prob=FLAGS.dropout_keep_prob,
                embeddings=embeddings,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
                overlap_needed=FLAGS.overlap_needed,
                learning_rate=FLAGS.learning_rate,
                trainable=FLAGS.trainable,
                extend_feature_dim=FLAGS.extend_feature_dim,
                pooling=FLAGS.pooling,
                position_needed=FLAGS.position_needed,
                conv=FLAGS.conv,
                margin=FLAGS.margin)
            cnn.build_graph()

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            train_writer = tf.summary.FileWriter(log_dir + '/train',
                                                 sess.graph)
            test_writer = tf.summary.FileWriter(log_dir + '/test')
            # Initialize all variables
            print("build over")
            sess.run(tf.global_variables_initializer())
            print("variables_initializer")

            # saver.restore(sess, 'runs/20170910/20170910154937/wiki')
            map_max = 0.65
            for i in range(FLAGS.num_epochs):

                datas = batch_gen_with_pair(train,
                                            alphabet,
                                            FLAGS.batch_size,
                                            q_len=q_max_sent_length,
                                            a_len=a_max_sent_length,
                                            fresh=FLAGS.fresh,
                                            overlap_dict=None)
                print("load data")
                for data in datas:
                    feed_dict = {
                        cnn.question: data[0],
                        cnn.answer: data[1],
                        cnn.answer_negative: data[2],
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, summary, step, loss, accuracy, score12, score13, see = sess.run(
                        [
                            cnn.train_op, cnn.merged, cnn.global_step,
                            cnn.loss, cnn.accuracy, cnn.score12, cnn.score13,
                            cnn.see
                        ], feed_dict)

                    train_writer.add_summary(summary, i)
                    time_str = datetime.datetime.now().isoformat()
                    print(
                        "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}"
                        .format(time_str, step, loss, accuracy,
                                np.mean(score12), np.mean(score13)))
                    line = "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}".format(
                        time_str, step, loss, accuracy, np.mean(score12),
                        np.mean(score13))
                    # print loss
                if i % 1 == 0:
                    predicted_dev = predict(sess, cnn, dev, alphabet,
                                            FLAGS.batch_size,
                                            q_max_sent_length,
                                            a_max_sent_length)
                    map_mrr_dev = evaluation.evaluationBypandas(
                        dev, predicted_dev)
                    predicted_test = predict(sess, cnn, test, alphabet,
                                             FLAGS.batch_size,
                                             q_max_sent_length,
                                             a_max_sent_length)
                    map_mrr_test = evaluation.evaluationBypandas(
                        test, predicted_test)

                    precise_test = evaluation.precision(test, predicted_test)

                    print("test precise : {}".format(precise_test))
                    print("{}:epoch:dev map mrr {}".format(i, map_mrr_dev))
                    print("{}:epoch:test map mrr {}".format(i, map_mrr_test))
                    line = " {}:epoch: precise: {}--- map_dev{}-------map_mrr_test{}".format(
                        i, precise_test, map_mrr_dev[0], map_mrr_test)
                    if map_mrr_dev[0] > map_max:
                        map_max = map_mrr_dev[0]

                        save_path = saver.save(sess, out_dir)
                        print("Model saved in file: ", save_path)

                log.write(line + '\n')
                log.flush()
            print('train over')
            saver.restore(sess, out_dir)
            predicted = predict(sess, cnn, train, alphabet, FLAGS.batch_size,
                                q_max_sent_length, a_max_sent_length)
            train['predicted'] = predicted
            map_mrr_train = evaluation.evaluationBypandas(train, predicted)
            predicted_dev = predict(sess, cnn, dev, alphabet, FLAGS.batch_size,
                                    q_max_sent_length, a_max_sent_length)
            dev['predicted'] = predicted_dev
            map_mrr_dev = evaluation.evaluationBypandas(dev, predicted_dev)
            predicted_test = predict(sess, cnn, test, alphabet,
                                     FLAGS.batch_size, q_max_sent_length,
                                     a_max_sent_length)
            test['predicted'] = predicted_test
            map_mrr_test = evaluation.evaluationBypandas(test, predicted_test)

            ap = evaluation.get_ap(test, predicted_test)
            ap.to_csv('ap_score_qlm_wiki', header=None, sep='\t')
            print('map_mrr train', map_mrr_train)
            print('map_mrr dev', map_mrr_dev)
            print('map_mrr test', map_mrr_test)
            log.write(str(map_mrr_train) + '\n')
            log.write(str(map_mrr_test) + '\n')
            log.write(str(map_mrr_dev) + '\n')
コード例 #29
0
def calculate(method):
    global dataset
    a = 0.0
    b = 0.0
    c = 0.0
    c_pre = np.array([0., 0., 0., 0., 0., 0., 0.])
    c_rec = np.array([0., 0., 0., 0., 0., 0., 0.])
    c_dcg = np.array([0., 0., 0., 0., 0., 0., 0.])

    set_data = sys.argv[1]
    setting = sys.argv[2]

    if setting == "1":
        user_list = np.loadtxt("./genre" + set_data + "/data/d11/user.csv",
                               delimiter=",").astype(np.int64)  #U1

        item_list = np.loadtxt("./genre" + set_data + "/data/d11/item.csv",
                               delimiter=",").astype(np.int64)  #genre

        data = np.loadtxt("./genre" + set_data + "/data/d11/data.csv",
                          delimiter=",").astype(np.int64)
    elif setting == "2":
        user_list = np.loadtxt("./genre" + set_data + "/data/d22/user.csv",
                               delimiter=",").astype(np.int64)  #U1

        item_list = np.loadtxt("./genre" + set_data + "/data/d22/item.csv",
                               delimiter=",").astype(np.int64)  #genre

        data = np.loadtxt("./genre" + set_data + "/data/d22/data.csv",
                          delimiter=",").astype(np.int64)
    elif setting == "3":
        user_list = np.loadtxt("./genre" + set_data + "/data/d1/user.csv",
                               delimiter=",").astype(np.int64)  #U1

        item_list = np.loadtxt("./genre" + set_data + "/data/d1/item.csv",
                               delimiter=",").astype(np.int64)  #genre

        data = np.loadtxt("./genre" + set_data + "/data/d1/data.csv",
                          delimiter=",").astype(np.int64)

    precision = np.zeros((repeate, sepalate, len(user_list)))
    recall = np.zeros((repeate, sepalate, len(user_list)))
    nDCG = np.zeros((repeate, sepalate, len(user_list)))

    # repeated K cross-validation
    for i in range(repeate):

        # kf : model cross validation from sikit-learn
        # shuffle sepalating train and test data and random state change per repeat time
        kf = KFold(n_splits=sepalate, random_state=i, shuffle=True)

        j = 0
        for train_index, test_index in kf.split(data):
            # make train and test matrix
            train_matrix = makeMatrix(data, train_index, user_list, item_list)
            test_matrix = makeMatrix(data, test_index, user_list, item_list)
            test_users = users_in_testdata(3, test_matrix, user_list)
            # learning model and calculate predicted user-item matrix
            pred = learning(method, train_matrix, train_index, data, user_list,
                            item_list)
            # np.save("./pred_temp.npy", pred)

            count_dict = collections.Counter(data[:, 1])

            # calculating precision, recall, and nDCG using "pred"
            pre, rec, new_c_pre, new_c_rec, recom, recom2 = ev.precision(
                3, pred, np.array(test_matrix.todense()), user_list, item_list,
                count_dict)
            dcg, new_c_dcg = ev.nDCG(3, pred, np.array(test_matrix.todense()),
                                     user_list, item_list)
            # save users' values for each criterion
            precision[i, j, :] = pre
            recall[i, j, :] = rec
            nDCG[i, j, :] = dcg
            c_pre = c_pre + new_c_pre
            c_rec = c_rec + new_c_rec
            c_dcg = c_dcg + new_c_dcg

            # print result which shows users' average, into standard output
            print("Process ID : " + str(os.getpid()))
            print("Repeat : " + str(i + 1))
            print("K-fold crossvalidation : " + str(j + 1) + "/" +
                  str(sepalate))
            print("Dataset : " + dataset)
            print("Mthod : " + method)
            print("Precision : " + str(np.mean(pre[test_users.nonzero()])))
            print("Recall : " + str(np.mean(rec[test_users.nonzero()])))
            print("nDCG : " + str(np.mean(dcg[test_users.nonzero()])))
            print(
                "================================================================================================="
            )
            a += np.mean(pre[test_users.nonzero()])
            b += np.mean(rec[test_users.nonzero()])
            c += np.mean(dcg[test_users.nonzero()])

            # gavege collection, numpy_ndarray not used hereafter
            del pred
            del test_matrix
            del train_matrix
            gc.collect()
            j = j + 1

    c_pre = c_pre / 30
    c_rec = c_rec / 30
    c_dcg = c_dcg / 30
    print(c_pre)
    np.save("result/" + dataset + "/" + method + "/Precision.npy", precision)
    np.save("result/" + dataset + "/" + method + "/Recall.npy", recall)
    np.save("result/" + dataset + "/" + method + "/nDCG.npy", nDCG)
    np.save(
        "result/movie.review/" + method + "/genre" + set_data + "_set" +
        setting + "/recom.npy", recom)
    np.save(
        "result/movie.review/" + method + "/genre" + set_data + "_set" +
        setting + "/recom2.npy", recom2)
    np.save(
        "result/movie.review/" + method + "/genre" + set_data + "_set" +
        setting + "/c_pre.npy", c_pre)
    np.save(
        "result/movie.review/" + method + "/genre" + set_data + "_set" +
        setting + "/c_rec.npy", c_rec)
    np.save(
        "result/movie.review/" + method + "/genre" + set_data + "_set" +
        setting + "/c_dcg.npy", c_dcg)

    print("Precision AVE : " + str(a / 30))
    print("Recall AVE : " + str(b / 30))
    print("nDCG AVE : " + str(c / 30))
コード例 #30
0
def calculate(method):
    global dataset
    precision_all = []
    recall_all = []
    ndcg_all = []
    a = 0.0
    b = 0.0
    c = 0.0

    user_Mu = np.loadtxt("./genre1-10/data/d22/user.csv",
                         delimiter=",").astype(np.int64)
    user_Mv = np.loadtxt("./genre1-10/data/d11/user.csv",
                         delimiter=",").astype(np.int64)
    test_user = np.loadtxt("./genre1-10/data/d21/user.csv",
                           delimiter=",").astype(np.int64)

    item_Mu = np.loadtxt("./genre1-10/data/d22/item.csv",
                         delimiter=",").astype(np.int64)
    item_Mv = np.loadtxt("./genre1-10/data/d11/item.csv",
                         delimiter=",").astype(np.int64)
    test_item = np.loadtxt("./genre1-10/data/d21/item.csv",
                           delimiter=",").astype(np.int64)

    data_Mu = np.loadtxt("./genre1-10/data/d22/data.csv",
                         delimiter=",").astype(np.int64)
    data_Mv = np.loadtxt("./genre1-10/data/d11/data.csv",
                         delimiter=",").astype(np.int64)
    test_data = np.loadtxt("./genre1-10/data/d21/data_s.csv",
                           delimiter=",").astype(np.int64)

    train_index = np.loadtxt("./genre1-10/data/d22/index.csv",
                             delimiter=",").astype(np.int64)
    train_index2 = np.loadtxt("./genre1-10/data/d11/index.csv",
                              delimiter=",").astype(np.int64)
    train_index3 = np.loadtxt("./genre1-10/data/d21/index_s.csv",
                              delimiter=",").astype(np.int64)

    u = np.loadtxt("./u.csv", delimiter=",")
    s = np.loadtxt("./s.csv", delimiter=",")
    vt = np.loadtxt("./vt.csv", delimiter=",")
    s_diag_matrix = np.diag(s)
    seq = (0, 1, 2, 3, 4)
    C_list = list(itertools.permutations(seq))
    vt_new = np.zeros(vt.shape)

    for i in range(120):
        for j in range(5):
            vt_new[j, :] = vt[C_list[i][j], :]

        for i in range(1):
            pred = np.dot(np.dot(u, s_diag_matrix), vt_new)

            test_matrix = makeMatrix(test_data, train_index3, test_user,
                                     test_item)
            test_users = users_in_testdata(3, test_matrix, test_user)

            # calculating precision, recall, and nDCG using "pred"
            pre, rec = ev.precision(3, pred, np.array(test_matrix.todense()),
                                    test_user, test_item)
            dcg = ev.nDCG(3, pred, np.array(test_matrix.todense()), test_user,
                          test_item)

            # print result which shows users' average, into standard output
            print("Process ID : " + str(os.getpid()))
            print("Repeat : " + str(i + 1))
            #print("K-fold crossvalidation : " + str(j + 1) + "/" + str(sepalate))
            print("Dataset : " + dataset)
            print("Mthod : " + method)
            print("Precision : " + str(np.mean(pre[test_users.nonzero()])))
            precision_all.append(np.mean(pre[test_users.nonzero()]))
            print("Recall : " + str(np.mean(rec[test_users.nonzero()])))
            recall_all.append(np.mean(rec[test_users.nonzero()]))
            print("nDCG : " + str(np.mean(dcg[test_users.nonzero()])))
            ndcg_all.append(np.mean(dcg[test_users.nonzero()]))
            print(
                "================================================================================================="
            )

            gc.collect()
            np.savetxt("precision_all.csv", precision_all, delimiter=",")
            np.savetxt("recall_all.csv", recall_all, delimiter=",")
            np.savetxt("ndcg_all.csv", ndcg_all, delimiter=",")
コード例 #31
0
ファイル: testSeg.py プロジェクト: pdsfsu/shoulder-prostheses
    fp = 0
    tn = 0
    fn = 0

    f = open(file, 'r')
    for line in f:
        lines += 1
        totals = line.split()
        # totals[0] is image name
        tp += int(totals[1])
        fp += int(totals[2])
        tn += int(totals[3])
        fn += int(totals[4])
    f.close()

    precision = evaluation.precision(tp, fp)
    sensitivity = evaluation.sensitivity(tp, fn)
    fmeasure = evaluation.fmeasure(tp, fp, fn)
    dicecoeff = evaluation.dicecoeff(tp, fp, fn)
    jaccardindex = evaluation.jaccardindex(tp, fp, fn)

    r = open('circle_found_results.txt', 'a')
    r.write('File name: ' + file + '\n')
    r.write('Lines: ' + str(lines) + '\n')
    r.write('True Positives: ' + str(tp) + '\n')
    r.write('False Positives: ' + str(fp) + '\n')
    r.write('True Negatives: ' + str(tn) + '\n')
    r.write('False Negatives: ' + str(fn) + '\n')
    r.write('Precision: ' + str(precision) + '\n')
    r.write('Sensitivity: ' + str(sensitivity) + '\n')
    r.write('F-Measure: ' + str(fmeasure) + '\n')
コード例 #32
0
ファイル: testSeg.py プロジェクト: pdsfsu/shoulder-prostheses
    tp = 0
    fp = 0
    tn = 0
    fn = 0

    f = open(file, 'r')
    for line in f:
        lines += 1
        totals = line.split()
        tp += int(totals[0])
        fp += int(totals[1])
        tn += int(totals[2])
        fn += int(totals[3])
    f.close()

    precision = evaluation.precision(tp, fp)
    sensitivity = evaluation.sensitivity(tp, fn)
    fmeasure = evaluation.fmeasure(tp, fp, fn)
    dicecoeff = evaluation.dicecoeff(tp, fp, fn)
    jaccardindex = evaluation.jaccardindex(tp, fp, fn)

    r = open('circle_found_results.txt', 'a')
    r.write('File name: ' + file + '\n')
    r.write('Lines: ' + str(lines) + '\n')
    r.write('True Positives: ' + str(tp) + '\n')
    r.write('False Positives: ' + str(fp) + '\n')
    r.write('True Negatives: ' + str(tn) + '\n')
    r.write('False Negatives: ' + str(fn) + '\n')
    r.write('Precision: ' + str(precision) + '\n')
    r.write('Sensitivity: ' + str(sensitivity) + '\n')
    r.write('F-Measure: ' + str(fmeasure) + '\n')
コード例 #33
0
            line[2], line[3]
        ])
        predict_and_real_std.append([
            med_map[uid][0], med_map[uid][1], med_map[uid][2], line[1],
            line[2], line[3]
        ])
        std_thres = 5
        if std_map[uid][0] > std_thres and 0 < predict_and_real_std[-1][0]:
            predict_and_real_std[-1][0] += 5
        if std_map[uid][1] > std_thres and 0 < predict_and_real_std[-1][1]:
            predict_and_real_std[-1][1] += 5
        if std_map[uid][2] > std_thres and 0 < predict_and_real_std[-1][2]:
            predict_and_real_std[-1][2] += 5

        predict_and_real_0_baseline.append(
            [0, 0, 0, line[1], line[2], line[3]])

        # print(predict_and_real_med[-1])

    print "Median predict: {:.2f}%".format(precision(predict_and_real_med))
    print "Average predict: {:.2f}%".format(precision(predict_and_real_avg))
    print "Median and STD predict: {:.2f}%".format(
        precision(predict_and_real_std))
    print "Zero baseline: {:.2f}%".format(
        precision(predict_and_real_0_baseline))

    cursor.close()
    cnx.close()
except mysql.connector.Error as err:
    print(err.msg)
コード例 #34
0
ファイル: train.py プロジェクト: zhaodongh/CNQLM_code
def test_point_wise():
    train, dev, test = load(FLAGS.data, filter=FLAGS.clean)  #wiki
    # train, test, dev = load(FLAGS.data, filter=FLAGS.clean) #trec
    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split()))
    print(q_max_sent_length)
    print(a_max_sent_length)
    print(len(train))
    print('train question unique:{}'.format(len(train['question'].unique())))
    print('train length', len(train))
    print('test length', len(test))
    print('dev length', len(dev))

    alphabet, embeddings, embeddings_complex = prepare(
        [train, test, dev],
        dim=FLAGS.embedding_dim,
        is_embedding_needed=True,
        fresh=True)
    print(embeddings_complex)
    print('alphabet:', len(alphabet))
    with tf.Graph().as_default():
        with tf.device("/gpu:0"):
            # session_conf = tf.ConfigProto(
            #     allow_soft_placement=FLAGS.allow_soft_placement,
            #     log_device_placement=FLAGS.log_device_placement)

            session_conf = tf.ConfigProto()
            session_conf.allow_soft_placement = FLAGS.allow_soft_placement
            session_conf.log_device_placement = FLAGS.log_device_placement
            session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            cnn = QA_quantum(max_input_left=q_max_sent_length,
                             max_input_right=a_max_sent_length,
                             vocab_size=len(alphabet),
                             embedding_size=FLAGS.embedding_dim,
                             batch_size=FLAGS.batch_size,
                             embeddings=embeddings,
                             embeddings_complex=embeddings_complex,
                             dropout_keep_prob=FLAGS.dropout_keep_prob,
                             filter_sizes=list(
                                 map(int, FLAGS.filter_sizes.split(","))),
                             num_filters=FLAGS.num_filters,
                             l2_reg_lambda=FLAGS.l2_reg_lambda,
                             is_Embedding_Needed=True,
                             trainable=FLAGS.trainable,
                             overlap_needed=FLAGS.overlap_needed,
                             position_needed=FLAGS.position_needed,
                             pooling=FLAGS.pooling,
                             hidden_num=FLAGS.hidden_num,
                             extend_feature_dim=FLAGS.extend_feature_dim)
            cnn.build_graph()
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            starter_learning_rate = FLAGS.learning_rate
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step, 100, 0.96)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            # optimizer =  tf.train.GradientDescentOptimizer(learning_rate)

            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            merged = tf.summary.merge_all()
            writer = tf.summary.FileWriter(
                "logs_NNQLM2_embedding_xiangwei_uniform/", sess.graph)
            sess.run(tf.global_variables_initializer())
            map_max = 0.65
            now = int(time.time())
            timeArray = time.localtime(now)
            timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray)
            timeDay = time.strftime("%Y%m%d", timeArray)
            print(timeStamp)
            for i in range(FLAGS.num_epochs):
                d = get_overlap_dict(train,
                                     alphabet,
                                     q_len=q_max_sent_length,
                                     a_len=a_max_sent_length)
                datas = batch_gen_with_point_wise(train,
                                                  alphabet,
                                                  FLAGS.batch_size,
                                                  overlap_dict=d,
                                                  q_len=q_max_sent_length,
                                                  a_len=a_max_sent_length)
                for data in datas:
                    feed_dict = {
                        cnn.question: data[0],
                        cnn.answer: data[1],
                        cnn.input_y: data[2],
                        cnn.q_position: data[3],
                        cnn.a_position: data[4],
                        cnn.overlap: data[5],
                        cnn.q_overlap: data[6],
                        cnn.a_overlap: data[7]
                    }
                    _, step, loss, accuracy, pred, scores, input_y, position = sess.run(
                        [
                            train_op, global_step, cnn.loss, cnn.accuracy,
                            cnn.predictions, cnn.scores, cnn.input_y,
                            cnn.embedding_W_pos
                        ], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}  ".format(
                        time_str, step, loss, accuracy))
                now = int(time.time())
                timeArray = time.localtime(now)
                timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray)
                timeDay = time.strftime("%Y%m%d", timeArray)
                print(timeStamp)
                predicted = predict(sess, cnn, train, alphabet,
                                    FLAGS.batch_size, q_max_sent_length,
                                    a_max_sent_length)
                predicted_label = np.argmax(predicted, 1)
                map_mrr_train = evaluation.evaluationBypandas(
                    train, predicted[:, -1])
                predicted_test = predict(sess, cnn, test, alphabet,
                                         FLAGS.batch_size, q_max_sent_length,
                                         a_max_sent_length)
                predicted_label = np.argmax(predicted_test, 1)
                p1 = evaluation.precision(test, predicted_test[:, -1])
                map_mrr_test = evaluation.evaluationBypandas(
                    test, predicted_test[:, -1])
                if map_mrr_test[0] > map_max:
                    map_max = map_mrr_test[0]
                    timeStamp = time.strftime("%Y%m%d%H%M%S",
                                              time.localtime(int(time.time())))
                    folder = 'runs/' + timeDay
                    out_dir = folder + '/' + timeStamp + \
                        '__' + FLAGS.data + str(map_mrr_test[0])
                    if not os.path.exists(folder):
                        os.makedirs(folder)
                    #save_path = saver.save(sess, out_dir)
                print("{}:train epoch:map mrr {}".format(i, map_mrr_train))
                print("{}:test epoch:map mrr {}".format(i, map_mrr_test))
                print("{}:test epoch: p1 {}".format(i, p1))
                line1 = " {}:epoch: map_train{}".format(i, map_mrr_train)
                line2 = " {}:epoch: map_test{}".format(i, map_mrr_test)
                line3 = " {}:epoch: p1{}".format(i, p1)
                log.write(line1 + '\n' + line2 + '\n' + line3 + '\n')
                log.flush()
            log.close()
コード例 #35
0
print
print "Some documents couldn't be predicted, then they was assigned with None and will not be evaluated"
positive_docs_non_predicted = 0
list_of_true_negative_documents = []
for tn in corpora.negatives[:num_of_documents]:
	if tn.predicted_polarity:
		list_of_true_negative_documents.append(tn)
	else:
		positive_docs_non_predicted += 1

negative_docs_non_predicted = 0
list_of_true_positive_documents = []
for tp in corpora.positives[:num_of_documents]:
	if tp.predicted_polarity:
		list_of_true_positive_documents.append(tp)
	else:
		negative_docs_non_predicted += 1

print "Positive docs non predicted: " + str(positive_docs_non_predicted)
print "Negative docs non predicted: " + str(negative_docs_non_predicted)
print

print "Precision"
print str(eval.precision(len(corpora.positives[:num_of_documents]), list_of_true_negative_documents, ref=0.5) * decimal.Decimal(100)) + ' %'
print "Recall"
print str(eval.recall(len(corpora.positives[:num_of_documents]), list_of_true_positive_documents, ref=0.5) * decimal.Decimal(100)) + ' %'
print "Accuracy"
print str(eval.accuracy(len(corpora.positives), len(corpora.negatives),
				list_of_true_positive_documents + list_of_true_negative_documents, ref=0.5) * decimal.Decimal(100)) + ' %'

コード例 #36
0
num_of_documents = args.qDocs

for p_doc in list(enumerate(corpora.positives[:num_of_documents])):
	print "extracting ngrams from positive documents"
	print p_doc[0]
	pp.extract_ngrams(p_doc[1], stopwords=args.stopwords)
	clear()

for n_doc in list(enumerate(corpora.negatives[:num_of_documents])):
	print "extracting ngrams from negative documents"
	print n_doc[0]
	pp.extract_ngrams(n_doc[1], stopwords=args.stopwords)
	clear()

print "____________________CLASSIFICATION STAGE____________________"
all_documents = corpora.positives[:num_of_documents] + corpora.negatives[:num_of_documents]
classifier = classification.OhanaBrendan(all_documents)
classifier.rule = args.tags
classifier.term_counting()

print "____________________EVALUATION STAGE____________________"
print args
print
print "Precision"
print str(eval.precision(len(corpora.positives[:num_of_documents]), corpora.negatives[:num_of_documents]) * decimal.Decimal(100)) + ' %'
print "Recall"
print str(eval.recall(len(corpora.positives[:num_of_documents]), corpora.positives[:num_of_documents]) * decimal.Decimal(100)) + ' %'
print "Accuracy"
print str(eval.accuracy(len(corpora.positives), len(corpora.negatives), all_documents) * decimal.Decimal(100)) + ' %'

コード例 #37
0
print c_regr.coef_
print l_regr.coef_
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % f_regr.score(X_test, f_y_test))
print('Variance score: %.2f' % c_regr.score(X_test, c_y_test))
print('Variance score: %.2f' % l_regr.score(X_test, l_y_test))

for i in range(0, len(y_predict)):
    if y_predict[i] == 0:
        f_y_test.append(y_test[i][0])
        c_y_test.append(y_test[i][1])
        l_y_test.append(y_test[i][2])
        f_y_predict.append(0)
        c_y_predict.append(0)
        l_y_predict.append(0)

predict_and_real = []
for i in range(0, len(y_predict)):
    predict_and_real.append([
        f_y_predict[i], c_y_predict[i], l_y_predict[i], f_y_test[i],
        c_y_test[i], l_y_test[i]
    ])

predict_and_real0 = []
for i in range(0, len(y_predict)):
    predict_and_real0.append([0, 0, 0, f_y_test[i], c_y_test[i], l_y_test[i]])

print(len(y_predict))
print "Predict: {:.2f}%".format(precision(predict_and_real))
print "Zero baseline: {:.2f}%".format(precision(predict_and_real0))
コード例 #38
0
def main(blocking_fn,
         classification_fn,
         threshold,
         minthresh,
         weightvec,
         blocking_attrs,
         func_list,
         save=False):

    # ******** In lab 3, explore different attribute sets for blocking ************

    # The list of attributes to use for blocking (all must occur in the above
    # attribute lists)
    blocking_attrA_list = blocking_attrs
    blocking_attrB_list = blocking_attrs

    # ******** In lab 4, explore different comparison functions for different  ****
    # ********           attributes                                            ****

    # The list of tuples (comparison function, attribute number in record A,
    # attribute number in record B)
    #
    exact_comp_funct_list = [
        (comparison.exact_comp, 1, 1),  # First name
        (comparison.exact_comp, 2, 2),  # Middle name
        (comparison.exact_comp, 3, 3),  # Last name
        (comparison.exact_comp, 8, 8),  # Suburb
        (comparison.exact_comp, 10, 10),  # State
    ]

    approx_comp_funct_list = [
        (func_list[0], 1, 1),  # First name
        (func_list[1], 2, 2),  # Middle name
        (func_list[2], 3, 3),  # Last name
        (func_list[3], 7, 7),  # Address
        (func_list[4], 8, 8),  # Suburb
        (func_list[5], 10, 10),  # State
    ]

    # =============================================================================
    #
    # Step 1: Load the two datasets from CSV files

    start_time = time.time()

    recA_dict = loadDataset.load_data_set(datasetA_name, rec_idA_col, \
                                          attrA_list, headerA_line)
    recB_dict = loadDataset.load_data_set(datasetB_name, rec_idB_col, \
                                          attrB_list, headerB_line)

    # Load data set of true matching pairs
    #
    true_match_set = loadDataset.load_truth_data(truthfile_name)

    loading_time = time.time() - start_time

    # -----------------------------------------------------------------------------
    # Step 2: Block the datasets

    def genericBlock(block_function='none',
                     recA_dict=recA_dict,
                     recB_dict=recB_dict,
                     blocking_attrA_list=blocking_attrA_list,
                     blocking_attrB_list=blocking_attrB_list):

        start_time = time.time()

        # Select one blocking technique
        if block_function == 'none':
            # No blocking (all records in one block)
            #
            resultA = blocking.noBlocking(recA_dict)
            resultB = blocking.noBlocking(recB_dict)

        if block_function == 'attr':
            # Simple attribute-based blocking
            #
            resultA = blocking.simpleBlocking(recA_dict, blocking_attrA_list)
            resultB = blocking.simpleBlocking(recB_dict, blocking_attrB_list)

        if block_function == 'soundex':
            # Phonetic (Soundex) based blocking
            #
            resultA = blocking.phoneticBlocking(recA_dict, blocking_attrA_list)
            resultB = blocking.phoneticBlocking(recB_dict, blocking_attrB_list)

        if block_function == 'slk':
            # Statistical linkage key (SLK-581) based blocking
            #
            fam_name_attr_ind = 3
            giv_name_attr_ind = 1
            dob_attr_ind = 6
            gender_attr_ind = 4

            resultA = blocking.slkBlocking(recA_dict, fam_name_attr_ind, \
                                              giv_name_attr_ind, dob_attr_ind, \
                                              gender_attr_ind)
            resultB = blocking.slkBlocking(recB_dict, fam_name_attr_ind, \
                                              giv_name_attr_ind, dob_attr_ind, \
                                              gender_attr_ind)

        block_time = time.time() - start_time

        # Print blocking statistics
        #
        # blocking.printBlockStatistics(resultA, resultB)

        return resultA, resultB, block_time

    blockA_dict, blockB_dict, blocking_time = genericBlock(
        block_function=blocking_fn)
    # -----------------------------------------------------------------------------
    # Step 3: Compare the candidate pairs

    start_time = time.time()

    sim_vec_dict = comparison.compareBlocks(blockA_dict, blockB_dict, \
                                            recA_dict, recB_dict, \
                                            approx_comp_funct_list)

    comparison_time = time.time() - start_time

    # -----------------------------------------------------------------------------
    # Step 4: Classify the candidate pairs

    def genericClassification(classification_function='exact',
                              sim_vec_dict=sim_vec_dict,
                              sim_threshold=threshold,
                              min_sim_threshold=minthresh,
                              weight_vec=weightvec,
                              true_match_set=true_match_set):
        start_time = time.time()

        if classification_function == 'exact':
            # Exact matching based classification
            class_match_set1, class_nonmatch_set1 = \
                         classification.exactClassify(sim_vec_dict)

        if classification_function == 'simthresh':
            # Similarity threshold based classification
            #
            class_match_set1, class_nonmatch_set1 = \
                        classification.thresholdClassify(sim_vec_dict, sim_threshold)

        if classification_function == 'minsim':
            # Minimum similarity threshold based classification
            #
            class_match_set1, class_nonmatch_set1 = \
                        classification.minThresholdClassify(sim_vec_dict,
                                                            min_sim_threshold)

        if classification_function == 'weightsim':
            # Weighted similarity threshold based classification
            #
            # weight_vec = [1.0] * len(approx_comp_funct_list)

            # Lower weights for middle name and state
            #
            # weight_vec = [2.0, 1.0, 2.0, 2.0, 2.0, 1.0]

            class_match_set1, class_nonmatch_set1 = \
                        classification.weightedSimilarityClassify(sim_vec_dict,
                                                                  weight_vec,
                                                                  sim_threshold)

        if classification_function == 'dt':
            # A supervised decision tree classifier
            #
            class_match_set1, class_nonmatch_set1 = \
                      classification.supervisedMLClassify(sim_vec_dict, true_match_set)

        class_time = time.time() - start_time

        return class_match_set1, class_nonmatch_set1, class_time

    threshold = minthresh

    class_match_set, class_nonmatch_set, classification_time = genericClassification(
        classification_fn)

    # -----------------------------------------------------------------------------
    # Step 5: Evaluate the classification

    # Initialise dictionary of results
    dict = {}

    # Get the number of record pairs compared
    #
    num_comparisons = len(sim_vec_dict)

    # Get the number of total record pairs to compared if no blocking used
    #
    all_comparisons = len(recA_dict) * len(recB_dict)

    # Get the list of identifiers of the compared record pairs
    #
    cand_rec_id_pair_list = sim_vec_dict.keys()

    # Blocking evaluation
    #
    rr = evaluation.reduction_ratio(num_comparisons, all_comparisons)
    pc = evaluation.pairs_completeness(cand_rec_id_pair_list, true_match_set)
    pq = evaluation.pairs_quality(cand_rec_id_pair_list, true_match_set)

    # Linkage evaluation
    #
    linkage_result = evaluation.confusion_matrix(class_match_set,
                                                 class_nonmatch_set,
                                                 true_match_set,
                                                 all_comparisons)

    accuracy = evaluation.accuracy(linkage_result)
    precision = evaluation.precision(linkage_result)
    recall = evaluation.recall(linkage_result)
    fmeasure = evaluation.fmeasure(linkage_result)

    # print('Linkage evaluation:')
    # print('  Accuracy:    %.3f' % (accuracy))
    # print('  Precision:   %.3f' % (precision))
    # print('  Recall:      %.3f' % (recall))
    # print('  F-measure:   %.3f' % (fmeasure))
    # print('')

    linkage_time = loading_time + blocking_time + comparison_time + \
                   classification_time
    # print('Total runtime required for linkage: %.3f sec' % (linkage_time))

    # Export blocking metrics
    dict['blocking_fn'] = blocking_fn
    dict['classification_fn'] = classification_fn
    dict['threshold'] = threshold
    dict['min_thresh'] = minthresh
    dict['weight_vec'] = weightvec
    dict['blocking_attrs'] = blocking_attrs
    dict['comp_funcs'] = func_list
    dict['num_comparisons'] = num_comparisons
    dict['all_comparisons'] = all_comparisons
    # dict['cand_rec_id_pair_list'] = cand_rec_id_pair_list
    dict['rr'] = rr
    dict['pc'] = pc
    dict['pq'] = pq
    dict['blocking_time'] = blocking_time
    # dict['linkage_result'] = linkage_result
    dict['accuracy'] = accuracy
    dict['precision'] = precision
    dict['recall'] = recall
    dict['fmeasure'] = fmeasure
    dict['linkage_time'] = linkage_time

    # Save results
    if save:
        saveLinkResult.save_linkage_set('final_results.txt', class_match_set)

    # Return results
    return dict