def agreementRates(clf,valLabels,uLabels,plot=False): ''' This function gets a classifiers dictionary generated from clfsEval and calculates the agreement of the classifiers and generate a list of accuracy, f1_scores and other performance metrics so that we can figure out the thold that can maximize performance ''' clfsLabels=[i for i in clf['pred_val']] clfsLabels=np.array(clfsLabels) agmnt=[] mvLabels=[] #Calculation of the majority vote labels for i in range(len(clfsLabels[1])): temp=clfsLabels[:,i] mx=len(temp) cts=dS.counts(temp) oCts=sorted(cts['counts']) temp=dS.expSmooth(oCts,0.95)/mx agmnt.append(temp) tmax=max(cts['counts']) tind=np.argwhere(np.array(cts['counts'])==tmax) #Check whether there is a tie in the number of counts #if there is one simply throw the dies if len(tind)==1: tind=np.argmax(cts['counts']) mvLabels.append(cts['vals'][tind]) else: tind=np.random.permutation(tind)[0] mvLabels.append(cts['vals'][tind]) sAgmnt=np.sort(np.unique(agmnt)) clf['agmntLevels']=sAgmnt #In this section is calculated the f1_score for the ground truth and #the predicted values filtering by the agreement rate f1_scores=[] for i in sAgmnt: inds2=np.argwhere(agmnt>=i) temp=[] for cnum in range(len(clf['pred_val'])): try: temp.append(f1_score(valLabels[inds2],clf['pred_val'][cnum][inds2],labels=uLabels)) except: print('problem at funcs agreement rates') # print(f1_score(valLabels[inds2],clf['pred_val'][cnum][inds2],labels=uLabels)) f1_scores.append(temp) if plot==True: plt.plot(sAgmnt,f1_scores) plt.show() clf['f1_score_val_predval_agmnt']=f1_scores return mvLabels,agmnt
def majorityVote(clfs,testingData,ignore=[]): ''' This function takes a set of previously built classifiers and calculates the majority vote for a given testing data set as well some performance measures related with the majority vote itself ignore is a list of the indexes of the classifiers to be ignored ''' votes=[] mV=[] agmnt=[] for i in range(len(clfs)): if i not in ignore: temp=clfs[i]['classifier'][0].predict(testingData) votes.append(temp) votes=np.array(votes) for i in range(np.shape(votes)[1]): temp=votes[:,i] mx=len(temp) cts=dS.counts(temp) oCts=sorted(cts['counts']) temp=dS.expSmooth(oCts,0.95)/mx agmnt.append(temp) tmax=max(cts['counts']) tind=np.argwhere(np.array(cts['counts'])==tmax) #Check whether there is a tie in the number of counts #if there is one simply throw the dies if len(tind)==1: tind=np.argmax(cts['counts']) mV.append(cts['vals'][tind]) else: tind=np.random.permutation(tind)[0] mV.append(cts['vals'][tind]) #Check the agreement and majority vote values #I should be done with this section if it is #working correctly return mV,agmnt,votes