def evaluate(prediction,labels_test): labels_test = [0 if x=="neutral" else 1 if x=="positive" else -1 for x in labels_test] #logistic regression evaluation print "Average F1 : " +str(measures.avgF1(labels_test,prediction,-1,1)) #print "Baseline AverageF1 : " +str(measures.avgF1(labels_test,baseline_prediction)) print "Accuracy : " +str(measures.accuracy(labels_test,prediction)) #print "Baseline Accuracy : "+str(measures.accuracy(labels_test,baseline_prediction)) print "F1 negative : " +str(measures.F1(labels_test,prediction,-1)) print "F1 positive : " +str(measures.F1(labels_test,prediction,1)) print "Precision negative: " +str(measures.precision(labels_test,prediction,-1)) print "Precision positive: " +str(measures.precision(labels_test,prediction,1)) print "Recall negative : " +str(measures.recall(labels_test,prediction,-1)) print "Recall positive : " +str(measures.recall(labels_test,prediction,1))
def C_comparison(length,features_train,labels_train,features_test,labels_test): C = [0.001,0.05,0.1,0.3,0.5,0.8,1,10,100,350,500,1000,3500,5000,10000,50000,100000] scores = [] for c in C: model = LogisticRegression.train(features_train,labels_train,c) prediction = LogisticRegression.predict(features_test,model) scores.append((measures.avgF1(labels_test,prediction,0,1))) plt.plot(C,scores,color="blue",linewidth="2.0") plt.xticks(C) plt.ylabel("F1") plt.xlabel("C") plt.show()
def plotFeaturesF1(features_train,labels_train,features_test,labels_test): x = list(np.arange(len(features_train[0]))) #x = list(np.arange(5)) y = [] for i in range(0,len(features_train[0])): f_train = features_train[:,i] f_test = features_test[:,i] f_train = f_train.reshape(f_train.shape[0],1) f_test = f_test.reshape(f_test.shape[0],1) model = LogisticRegression.train(f_train,labels_train) prediction = LogisticRegression.predict(f_test,model) y.append(measures.avgF1(labels_test,prediction,0,1)) plt.plot(x,y,color="blue",linewidth="2.0") plt.ylabel("F1") plt.xlabel("# of Feature") plt.xticks(x) plt.show()