Exemple #1
0
    def run_EM_missing(self):
        try:
            for class_no in range(self.min_class_label, self.max_class_label):
                y_observed, experty_observed = self.binary_y_experty(class_no)
                #random initializations for this class label
                weights = np.random.random(self.F)
                alpha = np.random.random(self.E)  #expert sensitivity
                beta = np.random.random(self.E)   #expert specificity
                l =0
                iter = 0
                while iter < self.MAXITER:
                    # First iteration
                    if not iter:
                        l_old = 0
                        expertcombined = np.array([])
                        
                        for e in xrange(self.E):
                            experty_observed[e][experty_observed[e] == -1] = randrange(self.min_class_label,self.max_class_label+1)
                            #self.Training_experty[e] = self.experty[e][:self.Training_instances]
  
                        
                        for e in experty_observed:
                            expertcombined = np.append(expertcombined,experty_observed[e], axis=0)
                        
                        
                        
                        expertcombined = np.reshape(expertcombined, (self.E, self.Training_instances))
                        y_predicted = np.average( expertcombined, axis=0)
                        y_average = y_predicted.copy()
                        #acc_MV = np.size(np.where((y_average.round())==y_observed))/float(self.Training_instances)
                        self.results['weights_mv'][class_no] = NR.logistic_regression(self.Training_x[:,1:].T,np.asarray(y_average).reshape(-1),verbose=False, MAXIT=10000)
                        self.results['weights_at'][class_no] = NR.logistic_regression(self.Training_x[:,1:].T,np.asarray(y_observed).reshape(-1),verbose=False, MAXIT=10000)

                    else :
                        l_old = l
                        w_old = weights
                        alpha_old = alpha
                        beta_old = beta
    
                        experty_learnt = self.learn_experty_missing(alpha_old, beta_old, y_observed)

                        for e in experty_observed:
                            missing_ids = np.where(self.experty[e] == -1)
                            for m in missing_ids:
                                experty_observed[e][m] = experty_learnt[e][m]
                        #print "experty :"
                        #pprint(experty_observed)
                        a = Utils.a_calculations(alpha_old, experty_observed,self.y_shape)
                        b = Utils.b_calculations(beta_old, experty_observed, self.y_shape)
                        # E-step
                        y_predicted = EM.Estep(self.Training_x, w_old, a, b)
                        y_predicted = np.asarray(y_predicted).reshape(-1)
    
                    # M-step
                    weights, alpha, beta = EM.Mstep(self.Training_x, y_predicted, experty_observed)
                    a = Utils.a_calculations(alpha, experty_observed, self.y_shape)
                    b = Utils.b_calculations(beta, experty_observed, self.y_shape)
    
                    l = self.calculate_loglikelihood(y_predicted, weights, a, b)
                    #acc_EM = np.size(np.where(y_observed==y_predicted.round()))/float(self.Training_instances)
                    diff =  np.fabs(l-l_old)
                    if diff <= self.CONV_THRESH and l>=l_old : break
                    iter = iter+1
                    if self.verbose:
                        print "EM algorithm :","diff:",diff,"log:", l, "iteration:", iter
    
                self.results['weights'][class_no] = weights
                self.results['alpha'][class_no] = alpha.round(1)
                self.results['beta'][class_no] = beta.round(1)
                """self.results['loglikelihood'][class_no] = l
                self.results['EM_perf']['f1_Score'][class_no] = Utils.calculate_F1score(y_observed, y_predicted)
                self.results['MV_perf']['f1_Score'][class_no] = Utils.calculate_F1score(y_observed, y_average)
                self.results['EM_perf']['rmse'][class_no] = Utils.calculate_RMSE(y_observed, y_predicted)
                self.results['MV_perf']['rmse'][class_no] = Utils.calculate_RMSE(y_observed, y_average)
                self.results['experty'] [class_no] = experty_observed
                fig = plt.figure()
                ax = fig.add_subplot(111)
                ax.set_title('class :' + str(class_no))
                ax.set_ylim(-1,2)
                ax.plot(y_observed,'ro-',y_predicted,'-b.')"""
                
                if self.verbose:
                    print "alphacap :"
                    pprint (alpha.round(1))
                    print "betacap :"
                    pprint (beta.round(1))
                    print "weights :"
                    pprint (weights)
    
    
                    print "f1_Score of EM approach :"
                    print self.results['EM_perf']['f1_Score'][class_no]
    
                    print "f1_Score of majority voting approach :"
                    print self.results['MV_perf']['f1_Score'][class_no]
    
                    print "y"
                    print y_observed
                    print "y maj"
                    print y_average.round(2)
                    print "y pred"
                    print y_predicted.round(2)
    
                    print "Expert wrong percentage"
                    print self.expert_wrong_percentage
    
                    print '--'*30
     
        except Exception, e:
            raise
Exemple #2
0
    def run(self):
        try:
            for class_no in range(self.min_class_label, self.max_class_label):
            #for class_no in range(0,3):
        #class_no = 1
                #print class_no
                y_observed, experty_observed = self.binary_y_experty(class_no)
                #random initializations for this class label
                weights = np.random.random(self.F)
                alpha = np.random.random(self.E)  #expert sensitivity
                beta = np.random.random(self.E)   #expert specificity
                l = 0
                iter = 0
                while iter < self.MAXITER:
                    # First iteration
                    if not iter:
                        l_old = 0
                        expertcombined = np.array([])
                        for e in experty_observed:
                            expertcombined = np.append(expertcombined,experty_observed[e], axis=0)

                        expertcombined = np.reshape(expertcombined, (self.E, self.Training_instances))
                        y_average = np.average( expertcombined, axis=0)
                        #y_average = y_predicted.copy()
                        
                        mv_expert_combined =  np.reshape(expertcombined,expertcombined.size,order='F').reshape(np.shape(expertcombined)[1],np.shape(expertcombined)[0])
                        y_predicted = np.array([])
                        
                        for emv in mv_expert_combined:
                            y_predicted = np.append(y_predicted, np.bincount(emv.astype(int)).argmax())
                        
                        

                        acc_MV = np.size(np.where((y_average.round())==y_observed))/float(self.Training_instances)

                        """
                        Classifier with MV as input
                        """
                        self.results['weights_mv'][class_no] = NR.logistic_regression(self.Training_x[:,1:].T,np.asarray(y_predicted).reshape(-1),verbose=False, MAXIT=10000)
                        self.results['weights_avg'][class_no] = NR.logistic_regression(self.Training_x[:,1:].T,np.asarray(y_average).reshape(-1),verbose=False, MAXIT=10000)
                        self.results['weights_at'][class_no] = NR.logistic_regression(self.Training_x[:,1:].T,np.asarray(y_observed).reshape(-1),verbose=False, MAXIT=10000)


                    else :
                        l_old = l
                        w_old = weights
                        alpha_old = alpha
                        beta_old = beta

                        a = Utils.a_calculations(alpha_old, experty_observed,self.y_shape)
                        b = Utils.b_calculations(beta_old, experty_observed, self.y_shape)
                        # E-step
                        y_predicted = EM.Estep(self.Training_x, w_old, a, b)
                        y_predicted = np.asarray(y_predicted).reshape(-1)

                    # M-step
                    weights, alpha, beta = EM.Mstep(self.Training_x, y_predicted, experty_observed)
                    a = Utils.a_calculations(alpha, experty_observed, self.y_shape)
                    b = Utils.b_calculations(beta, experty_observed, self.y_shape)

                    l = self.calculate_loglikelihood(y_predicted, weights, a, b)
                    acc_EM = np.size(np.where(y_observed==y_predicted.round()))/float(self.Training_instances)
                    diff =  np.fabs(l-l_old)
                    if diff <= self.CONV_THRESH and l>=l_old : break
                    iter = iter+1
                    if self.verbose:
                        print "EM algorithm :","diff:",diff,"log:", l, "iteration:", iter

                self.results['weights'][class_no] = weights
                self.results['alpha'][class_no] = alpha.round(1)
                self.results['beta'][class_no] = beta.round(1)
 
                if self.verbose:
                    print "alphacap :"
                    pprint (alpha.round(1))
                    print "betacap :"
                    pprint (beta.round(1))
                    print "weights :"
                    pprint (weights)


                    print "Accuracy of EM approach :"
                    print str(acc_EM)

                    print "Accuracy of majority voting approach :"
                    print str(acc_MV)

                    print "y"
                    print y_observed
                    print "y maj"
                    print y_average.round(2)
                    print "y pred"
                    print y_predicted.round(2)

                    print "Expert wrong percentage"
                    print self.expert_wrong_percentage

                    print '--'*30

        except:
            #print "Running EM again --"
            raise