Beispiel #1
0
def predictWithThreshold(datadir, threshold, penalty_type='l2'):
	maxent = LogisticRegression(penalty=penalty_type)
	scores = defaultdict(list)
	for dir in sorted(os.listdir(datadir), reverse=True):
		trainfeatures, trainlabels, vec = feats_and_classify.collect_features(datadir+dir+'/train.conll')
		TrainIndices=np.array(range(len(trainfeatures)))
		features, labels,  vec = feats_and_classify.collect_features(datadir+dir+'/all.conll')
		TestIndices=np.array(range(len(trainfeatures),len(features)))
#		print('\r'+dir, end="")
#		print(dir)
		TrainX_i = features[TrainIndices]
		Trainy_i = labels[TrainIndices]

		TestX_i = features[TestIndices]
		Testy_i =  labels[TestIndices]

		maxent.fit(TrainX_i,Trainy_i)
#		print('Finished fitting')
		ypred_i, score=pred_for_threshold(maxent,TestX_i,Testy_i, threshold)
#		print('Predicting')

		scores["F1"].append(score[0])
		scores["Recall"].append(score[1])
		scores["Accuracy"].append(score[2])
		scores["Precision"].append(score[3])

	
	#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
	print("\n--")

	for key in sorted(scores.keys()):
		currentmetric = np.array(scores[key])
		print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
	print("--")
def main():
	scriptdir = os.path.dirname(os.path.realpath(__file__))
	default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
	parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
	parser.add_argument('--pooled_annotators', help="parsed-and-label input format", default=default_pool)
	parser.add_argument('--threshold_matrix_file', help="location/name of the threshold matrix", default='annotator_threshold_matrix')
	args = parser.parse_args()

	threshold_dict = {}
	threshold_list = []
	current_label_list = []

	features, labels_pooled, vec = feats_and_classify.collect_features(args.pooled_annotators)
	print("total len of f, labels",len(features), len(labels_pooled))


	for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
#	for idx in "01".split(" "):
		current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
		f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann)
		print(idx,"len of f, labels",len(f_current), len(labels_current) )

	for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
#	for idx in "01".split(" "):
		current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"

		_, labels_current, _ = feats_and_classify.collect_features(current_single_ann)
		current_label_list.append(labels_current)

		maxent, thresholds=getBestThreshold(features, labels_pooled,labels_current)
		threshold_list.extend(thresholds)
		print(thresholds)
		predictAcrossThresholds(features, labels_pooled,labels_current, maxent, thresholds, average=True, median=True)


	pooled_score_dict_ave = defaultdict(list)
	pooled_score_dict_med = defaultdict(list)
	count=0
	for labels_current in current_label_list:
		count+=1
		print('ANNOTATOR COUNT '+str(count))
		score_dict_ave, score_dict_med = predictAcrossThresholds(features, labels_pooled,labels_current, maxent, np.array(threshold_list), average=True, median=True)
		for k in score_dict_ave:
			pooled_score_dict_ave[k].append(score_dict_ave[k][0])
			pooled_score_dict_med[k].append(score_dict_med[k][0])
	#finding dimensions of matrix to print out
	cols=20
	rows=len(pooled_score_dict_ave[k])/20
	mat=np.ndarray(shape=(rows,cols), buffer=np.array(pooled_score_dict_ave[k]), dtype=float)
	print(mat)
	pickle.dump( mat, open( args.threshold_matrix_file+"_ave.p", "wb" ) )
	mat=np.ndarray(shape=(rows,cols), buffer=np.array(pooled_score_dict_med[k]), dtype=float)	
	print(mat)
	pickle.dump( mat, open( args.threshold_matrix_file+"_med.p", "wb" ) )

	for k in pooled_score_dict_ave:
		print(k,":",np.array(pooled_score_dict_ave[k]).mean(),":",pooled_score_dict_ave[k])
		print(k,":",np.median(pooled_score_dict_med[k]),":",pooled_score_dict_med[k])

	sys.exit(0)
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    parser.add_argument('--threshold',type=float,default=0.5)
    parser.add_argument('--annotator',type=str,default="03")
    parser.add_argument('--penalty',type=str,choices=["l1","l2"],default="l1")


    args = parser.parse_args()
    current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+args.annotator+".lbl.conll"
    testfile = scriptdir+"/../data/cwi_testing/cwi_testing.txt.lbl.conll"
    X__dict_train, y_train, v_train = feats_and_classify.collect_features(current_single_ann,vectorize=False)
    X_dict_test, y_test, v_test = feats_and_classify.collect_features(testfile,vectorize=False)
    featdicts = list([x for x in X__dict_train + X_dict_test])
    vect = DictVectorizer()
    X = vect.fit_transform(featdicts).toarray()
    X_train=X[:len(y_train)]
    X_test=X[len(y_train):]

    maxent = LogisticRegression(penalty=args.penalty)
    maxent.fit(X_train,y_train)
    y_pred_proba = maxent.predict_proba(X_test)
    ypred_i=["1" if pair[1]>=args.threshold else "0" for pair in y_pred_proba]
    fout = open(args.annotator+".pred",mode="w")
    print("\n".join(ypred_i),file=fout)
    fout.close()
    sys.exit(0)
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    parser.add_argument('--iterations',type=int,default=5)

    args = parser.parse_args()


    all_feats = []
    all_labels = defaultdict(list)
    scores = defaultdict(list)




    for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
#    for idx in "01".split(" "):
        current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
        f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=False,generateFeatures=False)
        for instance_index,l in enumerate(labels_current):
            all_labels[instance_index].append(l)
    current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_01.lbl.conll"
    feats, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=True,generateFeatures=True)

    for it in range(args.iterations):
        for TrainIndices, TestIndices in cross_validation.KFold(n=feats.shape[0], n_folds=10, shuffle=True, random_state=None):
            maxent = LogisticRegression(penalty='l2')

            TrainX_i = feats[TrainIndices]
            Trainy_i = [all_labels[x][random.randrange(0,20)] for x in TrainIndices]

            TestX_i = feats[TestIndices]
            Testy_i =  [all_labels[x][random.randrange(0,20)] for x in TestIndices]

            maxent.fit(TrainX_i,Trainy_i)
            ypred_i = maxent.predict(TestX_i)

            acc = accuracy_score(ypred_i, Testy_i)
            pre = precision_score(ypred_i, Testy_i)
            rec = recall_score(ypred_i, Testy_i)
            # shared task uses f1 of *accuracy* and recall!
            f1 = 2 * acc * rec / (acc + rec)

            scores["Accuracy"].append(acc)
            scores["F1"].append(f1)
            scores["Precision"].append(pre)
            scores["Recall"].append(rec)
        #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
        print("--")

    for key in sorted(scores.keys()):
        currentmetric = np.array(scores[key])
        print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
    print("--")

    sys.exit(0)
Beispiel #5
0
def getBestThreshold(datadir, penalty_type='l2'):
    maxent = LogisticRegression(penalty=penalty_type)
    scores = {"F1": [], "Recall": [], "Accuracy": [], "Precision": []}
    thresholds = []

    print('Finding best thresholds...')
    for dir in os.listdir(datadir):
        trainfeatures, trainlabels, vec = feats_and_classify.collect_features(
            datadir + dir + '/train.conll')
        TrainIndices = np.array(range(len(trainfeatures)))
        features, labels, vec = feats_and_classify.collect_features(
            datadir + dir + '/all.conll')
        TestIndices = np.array(range(len(trainfeatures), len(features)))
        print('\r' + dir, end="")
        #		print(dir)
        TrainX_i = features[TrainIndices]
        Trainy_i = labels[TrainIndices]

        TestX_i = features[TestIndices]
        Testy_i = labels[TestIndices]

        maxent.fit(TrainX_i, Trainy_i)
        #		print('Finished fitting')
        #get prediction
        thresh_i, ypred_i, score = optimize_threshold(maxent, TestX_i, Testy_i)
        #		print('Optimising')
        thresholds.append(thresh_i)

        scores["F1"].append(score[0])
        scores["Recall"].append(score[1])
        scores["Accuracy"].append(score[2])
        scores["Precision"].append(score[3])

    #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
    print("\n--")

    for key in sorted(scores.keys()):
        currentmetric = np.array(scores[key])
        print("%s : %0.2f (+/- %0.2f)" %
              (key, currentmetric.mean(), currentmetric.std()))
    print("--")

    return np.array(thresholds)
Beispiel #6
0
def getBestThreshold(datadir, penalty_type='l2'):
	maxent = LogisticRegression(penalty=penalty_type)
	scores = {"F1":[], "Recall":[], "Accuracy":[], "Precision":[]}
	thresholds=[]

	print('Finding best thresholds...')
	for dir in os.listdir(datadir):
		trainfeatures, trainlabels, vec = feats_and_classify.collect_features(datadir+dir+'/train.conll')
		TrainIndices=np.array(range(len(trainfeatures)))
		features, labels,  vec = feats_and_classify.collect_features(datadir+dir+'/all.conll')
		TestIndices=np.array(range(len(trainfeatures),len(features)))
		print('\r'+dir, end="")
#		print(dir)
		TrainX_i = features[TrainIndices]
		Trainy_i = labels[TrainIndices]

		TestX_i = features[TestIndices]
		Testy_i =  labels[TestIndices]

		maxent.fit(TrainX_i,Trainy_i)
#		print('Finished fitting')
		#get prediction
		thresh_i, ypred_i, score=optimize_threshold(maxent,TestX_i,Testy_i)
#		print('Optimising')
		thresholds.append(thresh_i)

		scores["F1"].append(score[0])
		scores["Recall"].append(score[1])
		scores["Accuracy"].append(score[2])
		scores["Precision"].append(score[3])
	
	#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
	print("\n--")

	for key in sorted(scores.keys()):
		currentmetric = np.array(scores[key])
		print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
	print("--")
	
	return np.array(thresholds)
Beispiel #7
0
def main():
	scriptdir = os.path.dirname(os.path.realpath(__file__))
	defaultdata = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
	parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
	parser.add_argument('--train', help="parsed-and-label input format", default=defaultdata)
	args = parser.parse_args()

	features, labels, vec = feats_and_classify.collect_features(args.train)
	maxent, thresholds=getBestThreshold(features, labels)
	print(thresholds)
	predictAcrossThresholds(features, labels, maxent, thresholds, average=True, median=True)

	sys.exit(0)
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    args = parser.parse_args()


    all_feats = []
    all_labels = []
    scores = defaultdict(list)

    for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
#    for idx in "01".split(" "):
        current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
        f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=False)
        all_feats.extend(f_current)
        all_labels.extend(labels_current)

    feats = DictVectorizer().fit_transform(all_feats).toarray()
    all_labels = np.asarray(all_labels)
    for TrainIndices, TestIndices in cross_validation.KFold(n=feats.shape[0], n_folds=10, shuffle=True, random_state=None):
        maxent = LogisticRegression(penalty='l2')
        TrainX_i = feats[TrainIndices]
        Trainy_i = all_labels[TrainIndices]

        TestX_i = feats[TestIndices]
        Testy_i =  all_labels[TestIndices]

        maxent.fit(TrainX_i,Trainy_i)
        ypred_i = maxent.predict(TestX_i)

        acc = accuracy_score(ypred_i, Testy_i)
        pre = precision_score(ypred_i, Testy_i)
        rec = recall_score(ypred_i, Testy_i)
        # shared task uses f1 of *accuracy* and recall!
        f1 = 2 * acc * rec / (acc + rec)

        scores["Accuracy"].append(acc)
        scores["F1"].append(f1)
        scores["Precision"].append(pre)
        scores["Recall"].append(rec)
    #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
    print("--")

    for key in sorted(scores.keys()):
        currentmetric = np.array(scores[key])
        print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
    print("--")

    sys.exit(0)
Beispiel #9
0
def predictWithThreshold(datadir, threshold, penalty_type='l2'):
    maxent = LogisticRegression(penalty=penalty_type)
    scores = defaultdict(list)
    for dir in sorted(os.listdir(datadir), reverse=True):
        trainfeatures, trainlabels, vec = feats_and_classify.collect_features(
            datadir + dir + '/train.conll')
        TrainIndices = np.array(range(len(trainfeatures)))
        features, labels, vec = feats_and_classify.collect_features(
            datadir + dir + '/all.conll')
        TestIndices = np.array(range(len(trainfeatures), len(features)))
        #		print('\r'+dir, end="")
        #		print(dir)
        TrainX_i = features[TrainIndices]
        Trainy_i = labels[TrainIndices]

        TestX_i = features[TestIndices]
        Testy_i = labels[TestIndices]

        maxent.fit(TrainX_i, Trainy_i)
        #		print('Finished fitting')
        ypred_i, score = pred_for_threshold(maxent, TestX_i, Testy_i,
                                            threshold)
        #		print('Predicting')

        scores["F1"].append(score[0])
        scores["Recall"].append(score[1])
        scores["Accuracy"].append(score[2])
        scores["Precision"].append(score[3])

    #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
    print("\n--")

    for key in sorted(scores.keys()):
        currentmetric = np.array(scores[key])
        print("%s : %0.2f (+/- %0.2f)" %
              (key, currentmetric.mean(), currentmetric.std()))
    print("--")
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    parser = argparse.ArgumentParser(
        description=
        "Skeleton for features and classifier for CWI-2016--optimisation of threshhold"
    )
    parser.add_argument('--threshold', type=float, default=0.5)
    parser.add_argument('--annotator', type=str, default="03")
    parser.add_argument('--penalty',
                        type=str,
                        choices=["l1", "l2"],
                        default="l1")

    args = parser.parse_args()
    current_single_ann = scriptdir + "/../data/cwi_training/cwi_training_" + args.annotator + ".lbl.conll"
    testfile = scriptdir + "/../data/cwi_testing/cwi_testing.txt.lbl.conll"
    X__dict_train, y_train, v_train = feats_and_classify.collect_features(
        current_single_ann, vectorize=False)
    X_dict_test, y_test, v_test = feats_and_classify.collect_features(
        testfile, vectorize=False)
    featdicts = list([x for x in X__dict_train + X_dict_test])
    vect = DictVectorizer()
    X = vect.fit_transform(featdicts).toarray()
    X_train = X[:len(y_train)]
    X_test = X[len(y_train):]

    maxent = LogisticRegression(penalty=args.penalty)
    maxent.fit(X_train, y_train)
    y_pred_proba = maxent.predict_proba(X_test)
    ypred_i = [
        "1" if pair[1] >= args.threshold else "0" for pair in y_pred_proba
    ]
    fout = open(args.annotator + ".pred", mode="w")
    print("\n".join(ypred_i), file=fout)
    fout.close()
    sys.exit(0)
Beispiel #11
0
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_data_parsed = scriptdir + "/../data/cwi_training/cwi_training.txt.lbl.conll"
    default_data_allannotations = scriptdir + "/../data/cwi_training/cwi_training_allannotations.txt"
    parser = argparse.ArgumentParser(
        description=
        "Skeleton for features and classifier for CWI-2016--optimisation of threshhold"
    )
    parser.add_argument('--all_annotations_file',
                        help="parsed-and-label input format",
                        default=default_data_allannotations)
    parser.add_argument('--parsed_file',
                        help="parsed-and-label input format",
                        default=default_data_parsed)
    parser.add_argument('--threshold_matrix_file',
                        help="location/name of the threshold matrix",
                        default='annotator_threshold_matrix')
    parser.add_argument('--regularization',
                        help="regularizer, may be l1 or l2",
                        default='l2')
    args = parser.parse_args()

    print(args.regularization)

    # f1_matrix holds for every training annotator: the list of tuples of
    # avg/med f1_row based on avg/med threshold
    f1_matrix = []
    # holds for every training annotator: the list of tuples of avg/med threshold
    t_matrix = []
    current_label_list = []

    f1_final = [
    ]  # holds 4-tuples of avgs over (f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med) f.e. tr
    t_final = [
    ]  # holds 4-tuples of (t_avg_avg, t_avg_med, t_med_avg, t_med_med) f.e. tr

    X, _, v = feats_and_classify.collect_features(args.parsed_file)

    # train for every annotator...
    #for vote_threshold in range(1,21):
    for vote_threshold in range(3, 10):
        #current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+vote_threshold+".lbl.conll"
        y_current_tr = feats_and_classify.collect_labels_positive_threshold(
            args.all_annotations_file, vote_threshold)
        print(
            "Training, setting positive labels for examples with at least {} positive votes. "
            .format(vote_threshold))
        print("Training data has {} positive labels out of {}".format(
            sum(y_current_tr), len(y_current_tr)))
        f1_row = [
        ]  # holds 4-tuples of (f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med) f.e. tr/te
        t_row = []  # holds 2-tuples of (t_avg, t_med) f.e. tr/te
        f1_matrix.append(f1_row)
        t_matrix.append(t_row)

        # optimize t for every annotator (except training annotator), yields avg/med t
        #for idx in "03 04 05".split(" "):
        #for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
        # 02, 09, 17 are the annotators with the least/average/most positive votes
        for idx in "02 09 17".split(" "):
            print("  Testing on annotator " + idx)
            #current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"

            #y_current_te = feats_and_classify.collect_labels(current_single_ann)
            y_current_te = feats_and_classify.collect_labels(
                args.all_annotations_file,
                int(idx) - 1)
            current_label_list.append(y_current_te)

            maxent, thresholds = getBestThreshold(
                X,
                y_current_tr,
                y_current_te,
                regularization=args.regularization)
            t_avg = np.average(thresholds)
            t_med = np.median(thresholds)
            t_row.append((t_avg, t_med))

        # calculate avg of avg t's, avg of med t's, ... for the current training annotator
        t_avg_avg = np.average([t[0] for t in t_row])
        t_avg_med = np.average([t[1] for t in t_row])
        t_med_avg = np.median([t[0] for t in t_row])
        t_med_med = np.median([t[1] for t in t_row])
        t_final.append((t_avg_avg, t_avg_med, t_med_avg, t_med_med))

        print(
            "Computed optimal t's... Now running a new phase of CV experiments with these t's on test annotators."
        )

        #maxent = LogisticRegression(penalty=args.regularization)
        #maxent.fit(X, y_current_tr)
        #for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
        # 02, 09, 17 are the annotators with the least/average/most positive votes
        for idx in "02 09 17".split(" "):
            f1_avg_avg = 0
            f1_med_avg = 0
            f1_avg_med = 0
            f1_med_med = 0

            print("Testing globally optimal t's for annotator {}".format(idx))
            #y_current_te = feats_and_classify.collect_labels(current_single_ann)
            y_current_te = feats_and_classify.collect_labels(
                args.all_annotations_file,
                int(idx) - 1)
            #f1_avg_avg = cvWithThreshold(X, y_current_tr, y_current_te, t_avg_avg, args.regularization)['F1'][0]
            f1_avg_med = cvWithThreshold(X, y_current_tr, y_current_te,
                                         t_avg_med,
                                         args.regularization)['F1'][0]
            #f1_med_avg = cvWithThreshold(X, y_current_tr, y_current_te, t_med_avg, args.regularization)['F1'][0]
            #f1_med_med = cvWithThreshold(X, y_current_tr, y_current_te, t_med_med, args.regularization)['F1'][0]

            f1_row.append((f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med))

        f1_final.append(tuple(map(np.average, zip(*f1_row))))
        print(tuple(map(np.average, zip(*f1_row))))
    print(f1_final)
    # get the index (NB: array index!) of the max avg/med F1 (i.e. computed on avg/med threshold)
    best_vote_threshold_avg_avg = np.argmax([f1[0] for f1 in f1_final])
    best_vote_threshold_avg_med = np.argmax([f1[1] for f1 in f1_final])
    best_vote_threshold_med_avg = np.argmax([f1[2] for f1 in f1_final])
    best_vote_threshold_med_med = np.argmax([f1[3] for f1 in f1_final])

    print(t_final)

    sys.exit(0)
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_data_parsed = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
    default_data_allannotations = scriptdir+"/../data/cwi_training/cwi_training_allannotations.txt"
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    parser.add_argument('--all_annotations_file', help="parsed-and-label input format", default=default_data_allannotations)
    parser.add_argument('--parsed_file', help="parsed-and-label input format", default=default_data_parsed)
    parser.add_argument('--threshold_matrix_file', help="location/name of the threshold matrix", default='annotator_threshold_matrix')
    parser.add_argument('--regularization', help="regularizer, may be l1 or l2", default='l2')
    args = parser.parse_args()

    print(args.regularization) 

    # f1_matrix holds for every training annotator: the list of tuples of 
    # avg/med f1_row based on avg/med threshold
    f1_matrix = []
    # holds for every training annotator: the list of tuples of avg/med threshold
    t_matrix = []
    current_label_list = []
    
    f1_final = [] # holds 4-tuples of avgs over (f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med) f.e. tr 
    t_final  = [] # holds 4-tuples of (t_avg_avg, t_avg_med, t_med_avg, t_med_med) f.e. tr

    X, _, v = feats_and_classify.collect_features(args.parsed_file)

    # train for every annotator...
    #for vote_threshold in range(1,21):
    for vote_threshold in range(3,10):
        #current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+vote_threshold+".lbl.conll"
        y_current_tr = feats_and_classify.collect_labels_positive_threshold(args.all_annotations_file, vote_threshold)
        print("Training, setting positive labels for examples with at least {} positive votes. ".format(vote_threshold))
        print("Training data has {} positive labels out of {}".format(sum(y_current_tr), len(y_current_tr)))
        f1_row = [] # holds 4-tuples of (f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med) f.e. tr/te
        t_row  = [] # holds 2-tuples of (t_avg, t_med) f.e. tr/te
        f1_matrix.append(f1_row)
        t_matrix.append(t_row)
        
        # optimize t for every annotator (except training annotator), yields avg/med t 
        #for idx in "03 04 05".split(" "):
        #for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
        # 02, 09, 17 are the annotators with the least/average/most positive votes
        for idx in "02 09 17".split(" "):
            print("  Testing on annotator "+idx)
            #current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"

            #y_current_te = feats_and_classify.collect_labels(current_single_ann)
            y_current_te = feats_and_classify.collect_labels(args.all_annotations_file, int(idx)-1)
            current_label_list.append(y_current_te)

            maxent, thresholds=getBestThreshold(X, y_current_tr, y_current_te, regularization=args.regularization)
            t_avg = np.average(thresholds)
            t_med = np.median(thresholds)
            t_row.append((t_avg, t_med))
        
        # calculate avg of avg t's, avg of med t's, ... for the current training annotator
        t_avg_avg = np.average([t[0] for t in t_row]) 
        t_avg_med = np.average([t[1] for t in t_row]) 
        t_med_avg =  np.median([t[0] for t in t_row]) 
        t_med_med =  np.median([t[1] for t in t_row]) 
        t_final.append((t_avg_avg, t_avg_med, t_med_avg, t_med_med))

        print("Computed optimal t's... Now running a new phase of CV experiments with these t's on test annotators.")
 
        #maxent = LogisticRegression(penalty=args.regularization)
        #maxent.fit(X, y_current_tr) 
        #for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
        # 02, 09, 17 are the annotators with the least/average/most positive votes
        for idx in "02 09 17".split(" "):
            f1_avg_avg = 0
            f1_med_avg = 0
            f1_avg_med = 0
            f1_med_med = 0
 
            print("Testing globally optimal t's for annotator {}".format(idx))
            #y_current_te = feats_and_classify.collect_labels(current_single_ann)
            y_current_te = feats_and_classify.collect_labels(args.all_annotations_file, int(idx)-1)
            #f1_avg_avg = cvWithThreshold(X, y_current_tr, y_current_te, t_avg_avg, args.regularization)['F1'][0]
            f1_avg_med = cvWithThreshold(X, y_current_tr, y_current_te, t_avg_med, args.regularization)['F1'][0]
            #f1_med_avg = cvWithThreshold(X, y_current_tr, y_current_te, t_med_avg, args.regularization)['F1'][0]
            #f1_med_med = cvWithThreshold(X, y_current_tr, y_current_te, t_med_med, args.regularization)['F1'][0]

           
            f1_row.append((f1_avg_avg, f1_avg_med, f1_med_avg, f1_med_med))

        f1_final.append(tuple(map(np.average, zip(*f1_row))))
        print(tuple(map(np.average, zip(*f1_row))))
    print(f1_final)
    # get the index (NB: array index!) of the max avg/med F1 (i.e. computed on avg/med threshold)
    best_vote_threshold_avg_avg = np.argmax([f1[0] for f1 in f1_final])
    best_vote_threshold_avg_med = np.argmax([f1[1] for f1 in f1_final])
    best_vote_threshold_med_avg = np.argmax([f1[2] for f1 in f1_final])
    best_vote_threshold_med_med = np.argmax([f1[3] for f1 in f1_final])

    print(t_final)

    sys.exit(0)
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_pool = scriptdir + "/../data/cwi_training/cwi_training.txt.lbl.conll"
    parser = argparse.ArgumentParser(
        description=
        "Skeleton for features and classifier for CWI-2016--optimisation of threshhold"
    )
    parser.add_argument('--pooled_annotators',
                        help="parsed-and-label input format",
                        default=default_pool)
    parser.add_argument('--threshold_matrix_file',
                        help="location/name of the threshold matrix",
                        default='annotator_threshold_matrix')
    args = parser.parse_args()

    threshold_dict = {}
    threshold_list = []
    current_label_list = []

    features, labels_pooled, vec = feats_and_classify.collect_features(
        args.pooled_annotators)
    print("total len of f, labels", len(features), len(labels_pooled))

    for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(
            " "):
        #	for idx in "01".split(" "):
        current_single_ann = scriptdir + "/../data/cwi_training/cwi_training_" + idx + ".lbl.conll"
        f_current, labels_current, v_current = feats_and_classify.collect_features(
            current_single_ann)
        print(idx, "len of f, labels", len(f_current), len(labels_current))

    for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(
            " "):
        #	for idx in "01".split(" "):
        current_single_ann = scriptdir + "/../data/cwi_training/cwi_training_" + idx + ".lbl.conll"

        _, labels_current, _ = feats_and_classify.collect_features(
            current_single_ann)
        current_label_list.append(labels_current)

        maxent, thresholds = getBestThreshold(features, labels_pooled,
                                              labels_current)
        threshold_list.extend(thresholds)
        print(thresholds)
        predictAcrossThresholds(features,
                                labels_pooled,
                                labels_current,
                                maxent,
                                thresholds,
                                average=True,
                                median=True)

    pooled_score_dict_ave = defaultdict(list)
    pooled_score_dict_med = defaultdict(list)
    count = 0
    for labels_current in current_label_list:
        count += 1
        print('ANNOTATOR COUNT ' + str(count))
        score_dict_ave, score_dict_med = predictAcrossThresholds(
            features,
            labels_pooled,
            labels_current,
            maxent,
            np.array(threshold_list),
            average=True,
            median=True)
        for k in score_dict_ave:
            pooled_score_dict_ave[k].append(score_dict_ave[k][0])
            pooled_score_dict_med[k].append(score_dict_med[k][0])
    #finding dimensions of matrix to print out
    cols = 20
    rows = len(pooled_score_dict_ave[k]) / 20
    mat = np.ndarray(shape=(rows, cols),
                     buffer=np.array(pooled_score_dict_ave[k]),
                     dtype=float)
    print(mat)
    pickle.dump(mat, open(args.threshold_matrix_file + "_ave.p", "wb"))
    mat = np.ndarray(shape=(rows, cols),
                     buffer=np.array(pooled_score_dict_med[k]),
                     dtype=float)
    print(mat)
    pickle.dump(mat, open(args.threshold_matrix_file + "_med.p", "wb"))

    for k in pooled_score_dict_ave:
        print(k, ":",
              np.array(pooled_score_dict_ave[k]).mean(), ":",
              pooled_score_dict_ave[k])
        print(k, ":", np.median(pooled_score_dict_med[k]), ":",
              pooled_score_dict_med[k])

    sys.exit(0)