def main():
	f_clf = bc3_eval()
	print('over')
	title = 'david_chalmers_how_do_you_explain_consciousness'
	#process_document(title,sen2=1,exist=0)
	sample_vector = []
	vec,important,title_sim = process_document(title,sen2=1)
        topic_similarity = lda_process(vec)
	maxs = 0
	maxi = 0
	phrase_list,vec_phrase_index = phrase_extraction(vec)
        word_list = word_score_calculate(phrase_list)
        vec_phrase_score = important_word(vec,word_list,phrase_list,vec_phrase_index)
	cue_word_score = cue_word(vec)
	#print(important)
	for j in range(len(vec)):
                        tmp = f_clf.predict([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j],cue_word_score[j]])
			if(tmp>=maxs):
				maxs = tmp
				maxi = j
	print(vec[maxi].sentence)
def bc3_eval():
	corpus = 'bc3/bc3corpus.1.0/corpus.xml'	
	annotation = 'bc3/bc3corpus.1.0/annotation.xml'
	mails = parse_file(corpus)
	mails = parse_anno(annotation,mails)
	sample_vector = []
        target_vector = []
        precision_vector = []
        recall_vector = []
        F_measure_vector = []
	imp = 0
        ti_s = 0
        to_s = 0
	train = 20
	index =0
	for i in mails:
		if(index>=train):
			break
		index += 1
		vec,important,title_sim = process_mail(i)
		topic_similarity = lda_process(vec)
		phrase_list,vec_phrase_index = phrase_extraction(vec)
		word_list = word_score_calculate(phrase_list)
		vec_phrase_score = important_word(vec,word_list,phrase_list,vec_phrase_index)
		tmp_produce = []
		standard_summary = [anno(i)]
		standard_name = [[]]
		cue_word_score = cue_word(vec)
		for j in range(len(standard_summary[0])):
			standard_name[0].append('PythonROUGE/'+i.name+'/'+str(j)+'_standard.txt')
		#print(standard_name)
		#standard_name[0].append('PythonROUGE/'+i.name+'/'+str(0)+'_standard.txt')
		#print(standard_name)
                #standard_name = [['PythonROUGE/'+i+'_standard.txt']]
                newpath = 'PythonROUGE/'+i.name
                if not os.path.exists(newpath):
                        os.makedirs(newpath)
                for j in range(len(vec)):
                        produce_name = ['PythonROUGE/'+i.name+'/'+str(j)+'.txt']
                        produce_summary = [[vec[j].sentence]]
                        sample_vector.append([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j],cue_word_score[j]])
                        imp += important[j]
                        ti_s += title_sim[j]
                        to_s += topic_similarity[j]
                        recall,precision,F_measure = summary_eval(standard_summary,standard_name,produce_summary,produce_name)
                        target_vector.append(recall[0]*precision[0]*F_measure[0])
                        recall_vector.append(recall[0])
                        precision_vector.append(precision[0])
                        F_measure_vector.append(F_measure[0])
		shutil.rmtree(newpath)
	for i in sample_vector:
                i[0] = i[0]/imp
                i[1] = i[1]/ti_s
                i[2] = i[2]/to_s
        x_recall_train, x_recall_test, y_recall_train, y_recall_test = cross_validation.train_test_split(sample_vector,recall_vector, test_size=0.2, random_state=0)
        x_precision_train, x_precision_test, y_precision_train, y_precision_test = cross_validation.train_test_split(sample_vector,precision_vector, test_size=0.2, random_state=0)
        x_fmeasure_train, x_fmeasure_test, y_fmeasure_train, y_fmeasure_test = cross_validation.train_test_split(sample_vector,F_measure_vector, test_size=0.2, random_state=0)
        #parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-1,1e-2,1e-3, 1e-4,1e-5],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
        r_clf =  GridSearchCV(SVR(C=1,epsilon=0.2), tuned_parameters, cv=5)
        p_clf =  GridSearchCV(SVR(C=1,epsilon=0.2), tuned_parameters, cv=5)
        f_clf =  GridSearchCV(SVR(C=1,epsilon=0.2), tuned_parameters, cv=5)
	r_clf.fit(sample_vector,recall_vector)
        p_clf.fit(sample_vector,precision_vector)
        f_clf.fit(sample_vector,F_measure_vector)
	index = 0
	produce_summary = []
	produce_name = []
	standard_summary = []
	standard_name = []
	lex_summary = []
	lex_name = []
	for i in mails:
		if(index<train):
			index += 1
			continue
		if(i.name == 'Re:_StarOffice' or i.name == 'Try_Unsubscribing&ndash;&ndash;You_Can\'t'):
			continue
		#print(i.name)
		vec,important,title_sim = process_mail(i)
                topic_similarity = lda_process(vec)
		phrase_list,vec_phrase_index = phrase_extraction(vec)
                word_list = word_score_calculate(phrase_list)
                vec_phrase_score = important_word(vec,word_list,phrase_list,vec_phrase_index)
		cue_word_score = cue_word(vec)
                #word_list = word_score_calculate(phrase_extraction(vec))
                #print(word_list)
                tmp_produce = []
                standard_summary.append(anno(i))
                tmp_name = []
                for j in range(len(standard_summary[-1])):
                        tmp_name.append('PythonROUGE/'+i.name+'/'+str(j)+'_standard.txt')
		standard_name.append(tmp_name)
		newpath = 'PythonROUGE/'+i.name
                if not os.path.exists(newpath):
                        os.makedirs(newpath)
		maxs = 0
        	maxi = 0
		tmp_summary = []
		predict_rouge = []
        	for j in range(len(vec)):
                        #tmp = r_clf.predict([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j]])*p_clf.predict([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j]])*f_clf.predict([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j]])
                        tmp = f_clf.predict([important[j],title_sim[j],topic_similarity[j],vec_phrase_score[j],cue_word_score[j]])
			predict_rouge.append(tmp)
		sort_index = numpy.argsort(predict_rouge)[::-1]
		sort_index2 = numpy.argsort(important)[::-1]
		'''
		for j in range(10):
			tmp_summary.append(vec[sort_index[j]].sentence)
			tmp_name.append('PythonROUGE/'+i.name+'/'+str(j)+'.txt')
		'''
		lex_summary.append(vec[sort_index2[0]].sentence)
		tmp_summary.append(vec[sort_index[0]].sentence)
		produce_name.append('PythonROUGE/'+i.name+'/'+str(j)+'.txt')
		lex_name.append('PythonROUGE/'+i.name+'/'+str(j)+'_lex'+'.txt')
		produce_summary.append(tmp_summary)
	#print(standard_name)
	recall,precision,F_measure = summary_eval(standard_summary,standard_name,produce_summary,produce_name)
	print('recall:')
	print(recall)
	print('precision:')
	print(precision)
	print('F_measure:')
	print(F_measure)
	recall,precision,F_measure = summary_eval(standard_summary,standard_name,lex_summary,lex_name)
	print('lex_recall:')
        print(recall)
        print('lex_precision:')
        print(precision)
        print('lex_F_measure:')
        print(F_measure)
	return f_clf