예제 #1
0
def loadData(trainOutput, trainInput, dualParams, kernelParams):
    global cMMR

    params = mmr_setparams.cls_params()
    nview = 1
    params.ninputview = nview
    cMMR = mmr_mmr_cls.cls_mmr(params.ninputview)
    nfold = cMMR.nfold
    nrepeat = cMMR.nrepeat

    ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
    cdata_store=armar_load_data.cls_label_files(trainOutput,trainInput, \
                                                dualParams,kernelParams)
    cdata_store.load_mmr(cMMR)
    mdata = cMMR.mdata
    ## ############################################################
    xselector = np.ones(mdata)
    xselector[-1] = 0
    ifold = 0
    cMMR.split_train_test(xselector, ifold)
    cMMR.compute_kernels()
    ## cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates
    cMMR.csolver = mmr_solver_cls.cls_mmr_solver()
예제 #2
0
def loadData(trainOutput,trainInput,dualParams,kernelParams):
  global cMMR
  
  params=mmr_setparams.cls_params()
  nview=1
  params.ninputview=nview
  cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)
  nfold=cMMR.nfold
  nrepeat=cMMR.nrepeat

  ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  cdata_store=armar_load_data.cls_label_files(trainOutput,trainInput, \
                                              dualParams,kernelParams)  
  cdata_store.load_mmr(cMMR)
  mdata=cMMR.mdata
  ## ############################################################
  xselector=np.ones(mdata)
  xselector[-1]=0
  ifold=0
  cMMR.split_train_test(xselector,ifold)
  cMMR.compute_kernels()
  ## cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates
  cMMR.csolver=mmr_solver_cls.cls_mmr_solver()
예제 #3
0
def mmr_main(iworkmode):

    params = mmr_setparams.cls_params()

    np.set_printoptions(precision=4)

    dresult = {}
    ## ---------------------------------------------
    list_object = ['parts']
    nviews = 1
    lfile_in = [[[(3, 4, 21)]]]
    tfile_out = (3, 1, 6)

    lresult = []
    for iobject in range(len(lfile_in)):

        tfile_in = lfile_in[iobject]

        for ifeature in range(nviews):

            params.ninputview = len(tfile_in)
            cMMR = mmr_mmr_cls.cls_mmr(params.ninputview)
            nfold = cMMR.nfold
            nrepeat = cMMR.nrepeat
            cMMR.xbias = -0.95 - ifeature * 0.05
            print('Xbias:', cMMR.xbias)

            nscore = 4
            nipar = 1
            if cMMR.crossval_mode == 0:  ## random
                nfold0 = nfold
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))
            elif cMMR.crossval_mode == 1:  ## predefined trianing and test
                nrepeat = 1
                nfold0 = 1
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))

        ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

            cdata_store = objpart_load_data.cls_label_files()
            cdata_store.load_mmr(cMMR, tfile_in, tfile_out)
            mdata = cMMR.mdata

            ## -----------------------------------------------
            print('Output kernel type: ',
                  cMMR.YKernel.kernel_params.kernel_type)
            for i in range(params.ninputview):
                print(i, 'Input kernel type: ',
                      cMMR.XKernel[i].kernel_params.kernel_type)
            ## -------------------------------------------------

            xtime = np.zeros(5)
            ## ############################################################
            nparam = 4  ## C,D,par1,par2
            xbest_param = np.zeros((nrepeat, nfold0, nparam))

            ## xinpar=[0.2,0.3,0.4,0.5,0.6]
            for iipar in range(nipar):

                print('===================================================')
                for irepeat in range(nrepeat):
                    ## split data into training and test
                    if cMMR.crossval_mode == 0:  ## random selection
                        xselector = np.floor(np.random.random(mdata) * nfold0)
                        xselector = xselector - (xselector == nfold0)
                    elif cMMR.crossval_mode == 1:  ## preddefined training and test
                        xselector = np.zeros(mdata)
                        xselector[cMMR.ifixtrain] = 1

                    for ifold in range(nfold0):
                        cMMR.split_train_test(xselector, ifold)

                        ## validation to choose the best parameters
                        print('Validation')
                        t0 = time.clock()
                        ## select the kernel to be validated
                        cMMR.set_validation()

                        cvalidation = mmr_validation_cls.cls_mmr_validation()
                        cvalidation.validation_rkernel = cMMR.XKernel[0].title
                        best_param = cvalidation.mmr_validation(cMMR)

                        xtime[0] = time.clock() - t0

                        print('Best parameters found by validation')
                        print('c: ', best_param.c)
                        print('d: ', best_param.d)
                        print('par1: ', best_param.par1)
                        print('par2: ', best_param.par2)
                        xbest_param[irepeat, ifold, 0] = best_param.c
                        xbest_param[irepeat, ifold, 1] = best_param.d
                        xbest_param[irepeat, ifold, 2] = best_param.par1
                        xbest_param[irepeat, ifold, 3] = best_param.par2

                        cMMR.compute_kernels()
                        cMMR.Y0 = cMMR.YKernel.get_train(
                            cMMR.itrain)  ## candidates

                        ## training with the best parameters
                        print('Training')

                        print(cMMR.YKernel.kernel_params.kernel_type, \
                              cMMR.YKernel.kernel_params.ipar1, \
                              cMMR.YKernel.kernel_params.ipar2)
                        for iview in range(cMMR.ninputview):
                            print(cMMR.XKernel[iview].kernel_params.kernel_type, \
                                  cMMR.XKernel[iview].kernel_params.ipar1, \
                                  cMMR.XKernel[iview].kernel_params.ipar2)

                        t0 = time.clock()
                        cOptDual = cMMR.mmr_train()
                        xtime[1] = time.clock() - t0
                        ## cls transfers the dual variables to the test procedure
                        ## compute tests
                        ## check the train accuracy
                        print('Test')
                        cPredictTra = cMMR.mmr_test(cOptDual, itraindata=0)
                        ## counts the proportion the ones predicted correctly
                        ## ######################################
                        if cMMR.itestmode == 2:
                            print('Test knn')
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTra)
                        else:
                            ypred = cPredictTra.zPred
                        cEvaluationTra= \
                              mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                                 ypred)
                        xresult_train[iipar, irepeat,
                                      ifold] = cEvaluationTra.accuracy
                        print('>>>>>>>>>>>\n', cEvaluationTra.confusion)
                        ## ######################################
                        ## check the test accuracy
                        t0 = time.clock()
                        cPredictTes = cMMR.mmr_test(cOptDual, itraindata=1)
                        ## counts the proportion the ones predicted correctly
                        if cMMR.itestmode == 2:
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTes)
                        else:
                            ypred = cPredictTes.zPred
                        ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
                        cEvaluationTes= \
                              mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                                 ypred)

                        xtime[2] = time.clock() - t0
                        xresult_test[iipar, irepeat,
                                     ifold] = cEvaluationTes.accuracy

                        xpr[iipar, irepeat, ifold,
                            0] = cEvaluationTes.precision
                        xpr[iipar, irepeat, ifold, 1] = cEvaluationTes.recall
                        xpr[iipar, irepeat, ifold, 2] = cEvaluationTes.f1
                        xpr[iipar, irepeat, ifold, 3] = cEvaluationTes.accuracy

                        print(cEvaluationTes.confusion)
                        print(cEvaluationTes.classconfusion)
                        try:
                            xclassconfusion += cEvaluationTes.classconfusion
                        except:
                            (n, n) = cEvaluationTes.classconfusion.shape
                            xclassconfusion = np.zeros((n, n))
                            xclassconfusion += cEvaluationTes.classconfusion

            ## ####################################
                        print('Parameter:',iipar,'Repetition: ',irepeat, \
                              'Fold: ',ifold)
                        mmr_report('Result on one fold',
                                   xresult_train[iipar,irepeat,ifold], \
                                   xresult_test[iipar,irepeat,ifold], \
                                   xpr[iipar,irepeat,ifold,:])
                        print(
                            np.sum(xpr[iipar, irepeat, :ifold + 1, :], 0) /
                            (ifold + 1))

                    mmr_report('Result on one repetition',
                               np.mean(xresult_train[iipar,irepeat,:]), \
                               np.mean(xresult_test[iipar,irepeat,:]), \
                               np.mean(xpr[iipar,irepeat,:,:],0))

                mmr_report('Result on all repetitions @@@@@@@',
                           np.mean(xresult_train[iipar,:,:].flatten()), \
                           np.mean(xresult_test[iipar,:,:].flatten()), \
                           np.mean(np.mean(xpr[iipar,:,:,:],0),0))

                print('Average best parameters')
                ##  sfield=dir(best_param)
                xlabels = ('c', 'd', 'par1', 'par2')
                for i in range(nparam):
                    ##    print(sfield[i])
                    print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                          '(',np.std(xbest_param[:,:,i]),')')

                print('xtime:', xtime)
                sys.stdout.flush()

                dresult[ifeature] = (cMMR.xbias,
                                     np.mean(np.mean(xpr[iipar, :, :, :], 0),
                                             0))

        for sfeature_type, tresult in dresult.items():
            ## xhead=cMMR.xbias
            headkey = tfile_in[0][0]
            xhead = cdata_store.dirvar[headkey][0] + ', ' + cdata_store.dirvar[
                headkey][1]
            lresult.append((xhead, tresult))

        ## lresult.sort()
        ## for litem in lresult:
        ##   print(litem)

        print('\\begin{tabular}{l|rrr}')
        print('& \\multicolumn{3}{c}{' + 'Objects' + '} \\\\')
        print('Feature type & Precision & Recall & F1 \\\\ \\hline')
        for litem in lresult:
            print(litem[0],' & ','%6.4f'%litem[1][1][0], \
                  ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
        print('\\end{tabular}')

    print(xclassconfusion)

    print('Bye')

    return
예제 #4
0
def test_mvm_main(workmode):

  params=mmr_setparams.cls_params()

  xdatacls=mvm_mvm_cls.cls_mvm()
  nfold=xdatacls.nfold
  if xdatacls.itestmode==0:
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation

  nparacc=2   ## rmse, time
  npar=1
  xsummary=np.zeros((npar,nparacc))
  
  ifile=0
  pselect=0.05
  itrates=1
  print('ifile:',ifile)
  print('itrates:',itrates)
  print('pselect:',pselect)
  lfiles=[]
  
  for ipar in range(npar):

    rmatrix=mvm_random_matrix.cls_label_files()
    (xdata,nrow2,ncol2)=rmatrix.load(ifile,pselect,itrain=itrates)
    xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),None)
    scombine=''
    if xdatacls.itestmode==0:
      if xdatacls.ibootstrap==0:
        fname='xresultte_rand'+scombine+'.csv'
      elif xdatacls.ibootstrap==1:  
        fname='xresultte_active'+scombine+'.csv'
      elif xdatacls.ibootstrap==2:  
        fname='xresultte_greedy'+scombine+'.csv'
      elif xdatacls.ibootstrap==3:  
        fname='xresultte_act_rand'+scombine+'.csv'
    else:
      fname='xresultte_ncross'+scombine+'.csv'

    xdatacls.YKernel.ymax=1
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin=-1
    xdatacls.YKernel.yrange=200 # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                            /xdatacls.YKernel.yrange
    ##  set_printoptions(precision=4)
    nparam=4    # C,D,par1,par2
    nreport=4   ## accuracy, precision, recall, f1

    xdatacls.prepare_repetition_folding(init_train_size=100)
    nrepeat0=xdatacls.nrepeat0
    nfold0=xdatacls.nfold0

    creport=mmr_report_cls.cls_mmr_report()
    creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold0,nreport=nreport)
    xbest_param=np.zeros((nrepeat0,nfold0,nparam))

    # ############################################################

    nval=max(xdatacls.YKernel.valrange)+1
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    xsolvertime=0.0
    ireport=0
    for irepeat in range(nrepeat0):

      xdatacls.prepare_repetition_training()

      for ifold in range(nfold0):

        xdatacls.prepare_fold_training(ifold)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        cvalidation=mvm_validation_cls.cls_mvm_validation()
        cvalidation.validation_rkernel=xdatacls.XKernel[0].title
        best_param=cvalidation.mvm_validation(xdatacls)

        print('Parameters:',best_param.c,best_param.d, \
              best_param.par1,best_param.par2)

        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train()
        xsolvertime+=xdatacls.solvertime
        print('Training time:',time.time()-time0)
        sys.stdout.flush()

    # check the train accuracy
        print('test on training')

    # check the test accuracy
        print('test on test')
        time0=time.time()
        cPredict=xdatacls.mvm_test()
        print('Test time:',time.time()-time0)
        sys.stdout.flush()

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                          xdatacls.nrow,xdatacls,cPredict.Zrow)
        print('Evaluation time:',time.time()-time0)
        (qtest,qpred)=makearray(xdatacls,cPredict.Zrow)

        if xdatacls.ieval_type==0:
          creport.set_xaprf(irepeat,ifold,cEval)
        elif xdatacls.ieval_type==10:
          creport.set_xaprf(irepeat,ifold,cEval)
          xconfusion3[irepeat,ifold]=cEval.xconfusion3
        else:
          creport.set_xaprf(irepeat,ifold,cEval)

        xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
        xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
        ireport+=1

        ## print(cEval.xconfusion)
        if xdatacls.ieval_type==0:
          for xconfrow in cEval.xconfusion:
            for ditem in xconfrow:
              print('%7.0f'%ditem,end='')
            print()
          print()
        elif xdatacls.ieval_type==10:
          for xtable in cEval.xconfusion3:
            xsum=np.sum(xtable)
            if xsum==0:
              xsum=1
            xtable=100*xtable/xsum
            for xconfrow in xtable:
              for ditem in xconfrow:
                print('%9.4f'%ditem,end='')
              print()
            print()
          print()
        
    # ####################################    
        print('*** ipar, repeatation, fold ***') 
        print(ipar,irepeat,ifold)
        
        if xdatacls.itestmode==1: ## n-fold crossvalidation

          creport.report_prf(xmask=[irepeat,ifold], \
                             stitle='Result in one fold and one repetation', \
                             ssubtitle='Accuracy on test')

      creport.report_prf(xmask=[irepeat,None], \
                         stitle='Result in one repetation', \
                         ssubtitle='Mean and std of the accuracy on test')

      sys.stdout.flush()

      if xdatacls.itestmode==0: ## n-fold crossvalidation
        np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \
                   fmt='%6.4f')
      else:
        if xdatacls.ieval_type==0:
          np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \
                     fmt='%6.4f')
        else:
          np.savetxt(fname,creport.xaprf[:,:,0],delimiter=',',fmt='%6.4f')

    (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                     stitle='***** Overall result ****', \
                     ssubtitle='Mean and std of the accuracy on test + error')

    xsummary[ipar,0]=xmean[0]
    xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0)                          

    if xdatacls.ieval_type==10:
      confusion_latex(xconfusion3,lfiles)      
      
    print('Average best parameters')
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')

  print('$$$$$$$$$ Summary results:')
  (m,n)=xsummary.shape
  for i in range(m):
    for j in range(n):
      print('%10.4f'%xsummary[i,j],end='')
    print()

  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return
예제 #5
0
def mmr_main(iworkmode):

  params=mmr_setparams.cls_params()
  
  np.set_printoptions(precision=4)
  
  dresult={}
## ---------------------------------------------
  list_object=['parts']
  nviews=1
  lfile_in=[[[(3,4,21)]]]
  tfile_out=(3,1,6)

  lresult=[]
  for iobject in range(len(lfile_in)):

    tfile_in=lfile_in[iobject]
    
    for ifeature in range(nviews):

      params.ninputview=len(tfile_in)
      cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)
      nfold=cMMR.nfold
      nrepeat=cMMR.nrepeat
      cMMR.xbias=-0.95-ifeature*0.05
      print('Xbias:',cMMR.xbias)

      nscore=4
      nipar=1
      if cMMR.crossval_mode==0:   ## random
        nfold0=nfold
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))
      elif cMMR.crossval_mode==1:  ## predefined trianing and test
        nrepeat=1
        nfold0=1
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))

    ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

      cdata_store=objpart_load_data.cls_label_files()  
      cdata_store.load_mmr(cMMR,tfile_in,tfile_out)
      mdata=cMMR.mdata

      ## -----------------------------------------------
      print('Output kernel type: ',cMMR.YKernel.kernel_params.kernel_type)
      for i in range(params.ninputview):
        print(i,'Input kernel type: ',cMMR.XKernel[i].kernel_params.kernel_type)
      ## -------------------------------------------------

      xtime=np.zeros(5)
    ## ############################################################
      nparam=4    ## C,D,par1,par2
      xbest_param=np.zeros((nrepeat,nfold0,nparam))

      ## xinpar=[0.2,0.3,0.4,0.5,0.6]
      for iipar in range(nipar):

        print('===================================================')
        for irepeat in range(nrepeat):
        ## split data into training and test
          if cMMR.crossval_mode==0:  ## random selection
            xselector=np.floor(np.random.random(mdata)*nfold0)
            xselector=xselector-(xselector==nfold0)
          elif cMMR.crossval_mode==1: ## preddefined training and test
            xselector=np.zeros(mdata)
            xselector[cMMR.ifixtrain]=1

          for ifold in range(nfold0):
            cMMR.split_train_test(xselector,ifold)

            ## validation to choose the best parameters
            print('Validation')
            t0=time.clock()
            ## select the kernel to be validated
            cMMR.set_validation()

            cvalidation=mmr_validation_cls.cls_mmr_validation()
            cvalidation.validation_rkernel=cMMR.XKernel[0].title
            best_param=cvalidation.mmr_validation(cMMR)
          
            xtime[0]=time.clock()-t0

            print('Best parameters found by validation')
            print('c: ',best_param.c)
            print('d: ',best_param.d)
            print('par1: ',best_param.par1)
            print('par2: ',best_param.par2)
            xbest_param[irepeat,ifold,0]=best_param.c
            xbest_param[irepeat,ifold,1]=best_param.d
            xbest_param[irepeat,ifold,2]=best_param.par1
            xbest_param[irepeat,ifold,3]=best_param.par2

            cMMR.compute_kernels()
            cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates

      ## training with the best parameters
            print('Training')

            print(cMMR.YKernel.kernel_params.kernel_type, \
                  cMMR.YKernel.kernel_params.ipar1, \
                  cMMR.YKernel.kernel_params.ipar2)
            for iview in range(cMMR.ninputview):
              print(cMMR.XKernel[iview].kernel_params.kernel_type, \
                    cMMR.XKernel[iview].kernel_params.ipar1, \
                    cMMR.XKernel[iview].kernel_params.ipar2)
              
            
            t0=time.clock()
            cOptDual=cMMR.mmr_train()
            xtime[1]=time.clock()-t0
      ## cls transfers the dual variables to the test procedure
      ## compute tests 
      ## check the train accuracy
            print('Test')
            cPredictTra=cMMR.mmr_test(cOptDual,itraindata=0)
      ## counts the proportion the ones predicted correctly    
      ## ######################################
            if cMMR.itestmode==2:
              print('Test knn')
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTra)
            else:
              ypred=cPredictTra.zPred
            cEvaluationTra= \
                  mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                     ypred)
            xresult_train[iipar,irepeat,ifold]=cEvaluationTra.accuracy
            print('>>>>>>>>>>>\n',cEvaluationTra.confusion)
      ## ######################################     
      ## check the test accuracy
            t0=time.clock()
            cPredictTes= cMMR.mmr_test(cOptDual,itraindata=1)
      ## counts the proportion the ones predicted correctly
            if cMMR.itestmode==2:
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTes)
            else:
              ypred=cPredictTes.zPred
            ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
            cEvaluationTes= \
                  mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                     ypred)

            xtime[2]=time.clock()-t0
            xresult_test[iipar,irepeat,ifold]=cEvaluationTes.accuracy

            xpr[iipar,irepeat,ifold,0]=cEvaluationTes.precision
            xpr[iipar,irepeat,ifold,1]=cEvaluationTes.recall
            xpr[iipar,irepeat,ifold,2]=cEvaluationTes.f1
            xpr[iipar,irepeat,ifold,3]=cEvaluationTes.accuracy

            print(cEvaluationTes.confusion)
            print(cEvaluationTes.classconfusion)
            try:
              xclassconfusion+=cEvaluationTes.classconfusion
            except:
              (n,n)=cEvaluationTes.classconfusion.shape
              xclassconfusion=np.zeros((n,n))
              xclassconfusion+=cEvaluationTes.classconfusion

      ## ####################################
            print('Parameter:',iipar,'Repetition: ',irepeat, \
                  'Fold: ',ifold)
            mmr_report('Result on one fold',
                       xresult_train[iipar,irepeat,ifold], \
                       xresult_test[iipar,irepeat,ifold], \
                       xpr[iipar,irepeat,ifold,:])
            print(np.sum(xpr[iipar,irepeat,:ifold+1,:],0)/(ifold+1))

          mmr_report('Result on one repetition',
                     np.mean(xresult_train[iipar,irepeat,:]), \
                     np.mean(xresult_test[iipar,irepeat,:]), \
                     np.mean(xpr[iipar,irepeat,:,:],0))

        mmr_report('Result on all repetitions @@@@@@@',
                   np.mean(xresult_train[iipar,:,:].flatten()), \
                   np.mean(xresult_test[iipar,:,:].flatten()), \
                   np.mean(np.mean(xpr[iipar,:,:,:],0),0))



        print('Average best parameters')
      ##  sfield=dir(best_param)
        xlabels=('c','d','par1','par2')
        for i in range(nparam):
      ##    print(sfield[i])
          print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                '(',np.std(xbest_param[:,:,i]),')')

        print('xtime:',xtime)
        sys.stdout.flush()

        dresult[ifeature]=(cMMR.xbias,np.mean(np.mean(xpr[iipar,:,:,:],0),0))

    for sfeature_type,tresult in dresult.items():
      ## xhead=cMMR.xbias
      headkey=tfile_in[0][0]
      xhead=cdata_store.dirvar[headkey][0]+', '+cdata_store.dirvar[headkey][1]
      lresult.append((xhead,tresult))

    ## lresult.sort()
    ## for litem in lresult:
    ##   print(litem)

    print('\\begin{tabular}{l|rrr}')
    print('& \\multicolumn{3}{c}{'+'Objects'+'} \\\\')
    print('Feature type & Precision & Recall & F1 \\\\ \\hline')
    for litem in lresult:
      print(litem[0],' & ','%6.4f'%litem[1][1][0], \
            ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
    print('\\end{tabular}')  

  print(xclassconfusion)

  print('Bye')
  
  return
예제 #6
0
def mmr_main(iworkmode, trainingBase, evalFile, performcl):

    params = mmr_setparams.cls_params()

    ## np.set_printoptions(precision=4)

    dresult = {}
    nview = 1

    nobject = 1

    ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ## ################################################
    lviews = [0, 1]  ### this list could contain a subset of 0,1,2
    ## ################################################

    lresult = []
    for iobject in range(nobject):

        for ifeature in range(nview):

            params.ninputview = len(lviews)
            cMMR = mmr_mmr_cls.cls_mmr(params.ninputview)
            nfold = cMMR.nfold
            nrepeat = cMMR.nrepeat
            ## cMMR.xbias=-0.06  ## 4 categories
            cMMR.xbias = 0.02 - ifeature * 0.01
            ## cMMR.xbias=0.1-ifeature*0.01

            nscore = 4
            nipar = 1

            cMMR.crossval_mode = 1
            if cMMR.crossval_mode == 0:  ## random
                nfold0 = nfold
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))
            elif cMMR.crossval_mode == 1:  ## predefined trianing and test
                nrepeat = 1
                nfold0 = 1
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))

            cdata_store = trajlab_load_data.cls_label_files(
                trainingBase, evalFile, performcl)
            cdata_store.load_mmr(cMMR, lviews)
            mdata = cMMR.mdata

            xcross = np.zeros((mdata, mdata))

            ## !!!!!!!!!!!!!!!!!!
            ## params.validation.rkernel=cMMR.XKernel[0].title

            xtime = np.zeros(5)
            ## ############################################################
            nparam = 4  ## C,D,par1,par2
            xbest_param = np.zeros((nrepeat, nfold0, nparam))

            for iipar in range(nipar):

                for irepeat in range(nrepeat):
                    ## split data into training and test
                    if cMMR.crossval_mode == 0:  ## random selection
                        xselector = np.zeros(mdata)
                        ifold = 0
                        for i in range(mdata):
                            xselector[i] = ifold
                            ifold += 1
                            if ifold >= nfold0:
                                ifold = 0
                        np.random.shuffle(xselector)
                    elif cMMR.crossval_mode == 1:  ## preddefined training and test
                        # (added by simon) train with all data but the last one (not elegant, but works)
                        cMMR.ifixtrain = list(range(mdata - 1))
                        xselector = np.zeros(mdata)
                        xselector[cMMR.ifixtrain] = 1

                    for ifold in range(nfold0):
                        cMMR.split_train_test(xselector, ifold)

                        ## validation to choose the best parameters
                        t0 = time.clock()

                        ## !!!!!!!!!!!!!!!!!!!!!!!!!
                        cMMR.set_validation()
                        cvalidation = mmr_validation_cls.cls_mmr_validation()
                        ## !!!!!!!!!!!!!!!!!!!!!!!!! no parameter "params"
                        best_param = cvalidation.mmr_validation(cMMR)

                        xtime[0] = time.clock() - t0
                        xbest_param[irepeat, ifold, 0] = best_param.c
                        xbest_param[irepeat, ifold, 1] = best_param.d
                        xbest_param[irepeat, ifold, 2] = best_param.par1
                        xbest_param[irepeat, ifold, 3] = best_param.par2

                        cMMR.compute_kernels()
                        cMMR.Y0 = cMMR.YKernel.get_train(
                            cMMR.itrain)  ## candidates

                        t0 = time.clock()
                        ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
                        cOptDual = cMMR.mmr_train()
                        xtime[1] = time.clock() - t0
                        ## cls transfers the dual variables to the test procedure
                        ## compute tests
                        ## check the train accuracy
                        ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
                        cPredictTra = cMMR.mmr_test(cOptDual, itraindata=0)
                        ## counts the proportion the ones predicted correctly
                        ## ######################################
                        if cMMR.itestmode == 2:
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTra)
                        else:
                            ypred = cPredictTra.zPred
                        cEvaluationTra= \
                              mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                                 ypred)
                        xresult_train[iipar, irepeat,
                                      ifold] = cEvaluationTra.accuracy
                        ## ######################################
                        ## check the test accuracy
                        t0 = time.clock()
                        ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
                        cPredictTes = cMMR.mmr_test(cOptDual, itraindata=1)
                        ## counts the proportion the ones predicted correctly
                        if cMMR.itestmode == 2:
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTes)
                        else:
                            ypred = cPredictTes.zPred
                        ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
                        cEvaluationTes= \
                              mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                                 ypred)

                        xtime[2] = time.clock() - t0
                        xresult_test[iipar, irepeat,
                                     ifold] = cEvaluationTes.accuracy

                        xpr[iipar, irepeat, ifold,
                            0] = cEvaluationTes.precision
                        xpr[iipar, irepeat, ifold, 1] = cEvaluationTes.recall
                        xpr[iipar, irepeat, ifold, 2] = cEvaluationTes.f1
                        xpr[iipar, irepeat, ifold, 3] = cEvaluationTes.accuracy

                        # (added by simon) for now i will just add the new data to
                        # the dataset with a random label and check the confusion
                        # matrix --> very ugly solution but i cant figure it out
                        # in a clean way
                        # print(cEvaluationTes.classconfusion)
                        evaluatedRes = [
                            row[0] for row in cEvaluationTes.classconfusion
                        ]
                        evaluatedRes.append(cvalidation.validationScore)
                        #nonZeroIndexes = [i for i, e in enumerate(evaluatedRes) if e != 0]
                        #print(evaluatedRes)
                        #return nonZeroIndexes[0]
                        return evaluatedRes
                        try:
                            xclassconfusion += cEvaluationTes.classconfusion
                        except:
                            (n, n) = cEvaluationTes.classconfusion.shape
                            xclassconfusion = np.zeros((n, n))
                            xclassconfusion += cEvaluationTes.classconfusion
                        ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)
                        mmr_report.mmr_report('Result on one fold',
                               xresult_train[iipar,irepeat,ifold], \
                               xresult_test[iipar,irepeat,ifold], \
                               xpr[iipar,irepeat,ifold,:])

                sys.stdout.flush()

                dresult[ifeature] = (cMMR.xbias,
                                     np.mean(np.mean(xpr[iipar, :, :, :], 0),
                                             0))

        for sfeature_type, tresult in dresult.items():
            ## xhead=cMMR.xbias
            xhead = ''
            lresult.append((xhead, tresult))

    return [-1]
예제 #7
0
def mmr_main(iworkmode):

  params=mmr_setparams.cls_params()
  np.set_printoptions(precision=4)
  
  dresult={}
## ---------------------------------------------
  nview=1
  nobject=1
  params.ninputview=nview

  lresult=[]

  for iobject in range(nobject):

    for ifeature in range(nview):

      cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)
      nfold=cMMR.nfold
      nrepeat=cMMR.nrepeat
      ## cMMR.xbias=-0.06  ## 4 categories
      cMMR.xbias=0.0 
      ## cMMR.xbias=0.1-ifeature*0.01 
      print('Xbias:',cMMR.xbias)

      nscore=4
      nipar=1
      if cMMR.crossval_mode==0:   ## random
        nfold0=nfold
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))
      elif cMMR.crossval_mode==1:  ## predefined trianing and test
        nrepeat=1
        nfold0=1
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))

    ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

      ## cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)

      cdata_store=vision_load_data.cls_label_files()  
      cdata_store.load_mmr(cMMR)
      mdata=cMMR.mdata

      ## -----------------------------------------------
      print('Output kernel type: ',cMMR.YKernel.kernel_params.kernel_type)
      for i in range(params.ninputview):
        print(i,'Input kernel type: ',cMMR.XKernel[i].kernel_params.kernel_type)
      ## -------------------------------------------------

      xcross=np.zeros((mdata,mdata))

      xtime=np.zeros(5)
    ## ############################################################
      nparam=4    ## C,D,par1,par2
      xbest_param=np.zeros((nrepeat,nfold0,nparam))

      for iipar in range(nipar):
        
        print('===================================================')
        for irepeat in range(nrepeat):
        ## split data into training and test
          if cMMR.crossval_mode==0:  ## random selection
            xselector=np.zeros(mdata)
            ifold=0
            for i in range(mdata):
              xselector[i]=ifold
              ifold+=1
              if ifold>=nfold0:
                ifold=0
            np.random.shuffle(xselector)
            ## xselector=np.floor(np.random.random(mdata)*nfold0)
            ## xselector=xselector-(xselector==nfold0)
          elif cMMR.crossval_mode==1: ## preddefined training and test
            xselector=np.zeros(mdata)
            xselector[cMMR.ifixtrain]=1

          for ifold in range(nfold0):
            cMMR.split_train_test(xselector,ifold)

            ## validation to choose the best parameters
            print('Validation')
            t0=time.clock()
            ## select the kernel to be validated
            cMMR.set_validation()

            cvalidation=mmr_validation_cls.cls_mmr_validation()
            cvalidation.validation_rkernel=cMMR.XKernel[0].title
            best_param=cvalidation.mmr_validation(cMMR)


            xtime[0]=time.clock()-t0

            print('Best parameters found by validation')
            print('c: ',best_param.c)
            print('d: ',best_param.d)
            print('par1: ',best_param.par1)
            print('par2: ',best_param.par2)
            xbest_param[irepeat,ifold,0]=best_param.c
            xbest_param[irepeat,ifold,1]=best_param.d
            xbest_param[irepeat,ifold,2]=best_param.par1
            xbest_param[irepeat,ifold,3]=best_param.par2

            cMMR.compute_kernels()
            cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates

      ## training with the best parameters
            print('Training')

            print(cMMR.YKernel.kernel_params.kernel_type, \
                  cMMR.YKernel.kernel_params.ipar1, \
                  cMMR.YKernel.kernel_params.ipar2)
            for iview in range(cMMR.ninputview):
              print(cMMR.XKernel[iview].kernel_params.kernel_type, \
                    cMMR.XKernel[iview].kernel_params.ipar1, \
                    cMMR.XKernel[iview].kernel_params.ipar2)
              
            
            t0=time.clock()
            cOptDual=cMMR.mmr_train()
            xtime[1]=time.clock()-t0
      ## cls transfers the dual variables to the test procedure
      ## compute tests 
      ## check the train accuracy
            print('Test')
            cPredictTra=cMMR.mmr_test(cOptDual,itraindata=0)
      ## counts the proportion the ones predicted correctly    
      ## ######################################
            if cMMR.itestmode==2:
              print('Test knn')
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTra)
            else:
              ypred=cPredictTra.zPred
            cEvaluationTra= \
                  mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                     ypred)
            xresult_train[iipar,irepeat,ifold]=cEvaluationTra.accuracy
            print('>>>>>>>>>>>\n',cEvaluationTra.confusion)
      ## ######################################     
      ## check the test accuracy
            t0=time.clock()
            cPredictTes= cMMR.mmr_test(cOptDual,itraindata=1)
      ## counts the proportion the ones predicted correctly
            if cMMR.itestmode==2:
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTes)
            else:
              ypred=cPredictTes.zPred
            ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
            cEvaluationTes= \
                  mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                     ypred)

            xtime[2]=time.clock()-t0
            xresult_test[iipar,irepeat,ifold]=cEvaluationTes.accuracy

            xpr[iipar,irepeat,ifold,0]=cEvaluationTes.precision
            xpr[iipar,irepeat,ifold,1]=cEvaluationTes.recall
            xpr[iipar,irepeat,ifold,2]=cEvaluationTes.f1
            xpr[iipar,irepeat,ifold,3]=cEvaluationTes.accuracy

            print(cEvaluationTes.confusion)
            print(cEvaluationTes.classconfusion)
            try:
              xclassconfusion+=cEvaluationTes.classconfusion
            except:
              (n,n)=cEvaluationTes.classconfusion.shape
              xclassconfusion=np.zeros((n,n))
              xclassconfusion+=cEvaluationTes.classconfusion
            ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)

      ## ####################################
            print('Parameter:',iipar,'Repetition: ',irepeat, \
                  'Fold: ',ifold)
            mmr_report('Result on one fold',
                       xresult_train[iipar,irepeat,ifold], \
                       xresult_test[iipar,irepeat,ifold], \
                       xpr[iipar,irepeat,ifold,:])
            print(np.sum(xpr[iipar,irepeat,:ifold+1,:],0)/(ifold+1))

          mmr_report('Result on one repetition',
                     np.mean(xresult_train[iipar,irepeat,:]), \
                     np.mean(xresult_test[iipar,irepeat,:]), \
                     np.mean(xpr[iipar,irepeat,:,:],0))

        mmr_report('Result on all repetitions @@@@@@@',
                   np.mean(xresult_train[iipar,:,:].flatten()), \
                   np.mean(xresult_test[iipar,:,:].flatten()), \
                   np.mean(np.mean(xpr[iipar,:,:,:],0),0))



        print('Average best parameters')
      ##  sfield=dir(best_param)
        xlabels=('c','d','par1','par2')
        for i in range(nparam):
      ##    print(sfield[i])
          print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                '(',np.std(xbest_param[:,:,i]),')')

        print('xtime:',xtime)
        sys.stdout.flush()

        dresult[ifeature]=(cMMR.xbias,np.mean(np.mean(xpr[iipar,:,:,:],0),0))

    for sfeature_type,tresult in dresult.items():
      ## xhead=cMMR.xbias
      xhead=''
      lresult.append((xhead,tresult))

    ## lresult.sort()
    ## for litem in lresult:
    ##   print(litem)

    print('\\begin{tabular}{l|rrr}')
    print('& \\multicolumn{3}{c}{'+'Objects'+'} \\\\')
    print('Feature type & Precision & Recall & F1 \\\\ \\hline')
    for litem in lresult:
      print(litem[0],' & ','%6.4f'%litem[1][1][0], \
            ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
    print('\\end{tabular}')  

    ## print('\\begin{tabular}{l|rrr}')
    ## print('& \\multicolumn{3}{c}{'+'Objects'+'} \\\\')
    ## print('Feature & xbias & Precision & Recall & F1 \\\\ \\hline')
    ## for litem in lresult:
    ##   print(litem[0],' & ','%6.4f'%litem[1][0],' & ','%6.4f'%litem[1][1][0], \
    ##         ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
    ## print('\\end{tabular}')  


  ## ##########################################################
  ## !!!! It saves the optimal dual variables, and optimal, crossvalidated,
  ##  kernel parameters into files given in vision_load_data.

  ## prepare full training with the best parameters

  ifold=0
  xselector=np.ones(mdata)
  cMMR.split_train_test(xselector,ifold)
  best_param=np.array([ np.mean(xbest_param[:,:,i]) for i in range(nparam)])
  cMMR.penalty.c=best_param[0]
  cMMR.penalty.d=best_param[1]
  cMMR.XKernel[0].kernel_params.ipar1=best_param[2]
  cMMR.XKernel[0].kernel_params.ipar2=best_param[3]

  cMMR.compute_kernels()
  cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates
  ## training with the best parameters
  print('Full training')
  cOptDual=cMMR.mmr_train()

  np.savetxt(cdata_store.sbasedir+cdata_store.dual_params,cMMR.dual.alpha, \
             fmt='%9.4f')
  np.savetxt(cdata_store.sbasedir+cdata_store.kernel_params,best_param[2:], \
             fmt='%9.4f')

  print(xclassconfusion)

  print('Bye')
  
  return
예제 #8
0
def mmr_main(iworkmode, trainingBase, evalFile, performcl):

  params=mmr_setparams.cls_params()
  
  ## np.set_printoptions(precision=4)
  
  dresult={}
  nview=1

  nobject=1

  ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  ## ################################################
  lviews=[0,1] ### this list could contain a subset of 0,1,2
  ## ################################################

  lresult=[]
  for iobject in range(nobject):

    for ifeature in range(nview):

      params.ninputview=len(lviews)
      cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)
      nfold=cMMR.nfold
      nrepeat=cMMR.nrepeat
      ## cMMR.xbias=-0.06  ## 4 categories
      cMMR.xbias=0.02-ifeature*0.01 
      ## cMMR.xbias=0.1-ifeature*0.01 

      nscore=4
      nipar=1
      
      cMMR.crossval_mode = 1
      if cMMR.crossval_mode==0:   ## random
        nfold0=nfold
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))
      elif cMMR.crossval_mode==1:  ## predefined trianing and test
        nrepeat=1
        nfold0=1
        xresult_test=np.zeros((nipar,nrepeat,nfold0))
        xresult_train=np.zeros((nipar,nrepeat,nfold0))
        xpr=np.zeros((nipar,nrepeat,nfold0,nscore))

      cdata_store = trajlab_load_data.cls_label_files(trainingBase, evalFile, performcl)  
      cdata_store.load_mmr(cMMR, lviews)
      mdata=cMMR.mdata

      xcross=np.zeros((mdata,mdata))

      ## !!!!!!!!!!!!!!!!!!
      ## params.validation.rkernel=cMMR.XKernel[0].title

      xtime=np.zeros(5)
    ## ############################################################
      nparam=4    ## C,D,par1,par2
      xbest_param=np.zeros((nrepeat,nfold0,nparam))

      for iipar in range(nipar):
        
        for irepeat in range(nrepeat):
        ## split data into training and test
          if cMMR.crossval_mode==0:  ## random selection
            xselector=np.zeros(mdata)
            ifold=0
            for i in range(mdata):
              xselector[i]=ifold
              ifold+=1
              if ifold>=nfold0:
                ifold=0
            np.random.shuffle(xselector)
          elif cMMR.crossval_mode==1: ## preddefined training and test
      # (added by simon) train with all data but the last one (not elegant, but works)
            cMMR.ifixtrain = list(range(mdata - 1))
            xselector = np.zeros(mdata)
            xselector[cMMR.ifixtrain] = 1

          for ifold in range(nfold0):
            cMMR.split_train_test(xselector,ifold)

            ## validation to choose the best parameters
            t0 = time.clock()

            ## !!!!!!!!!!!!!!!!!!!!!!!!!
            cMMR.set_validation()
            cvalidation=mmr_validation_cls.cls_mmr_validation()
            ## !!!!!!!!!!!!!!!!!!!!!!!!! no parameter "params"
            best_param = cvalidation.mmr_validation(cMMR)
              
            xtime[0] = time.clock() - t0
            xbest_param[irepeat,ifold,0]=best_param.c
            xbest_param[irepeat,ifold,1]=best_param.d
            xbest_param[irepeat,ifold,2]=best_param.par1
            xbest_param[irepeat,ifold,3]=best_param.par2

            cMMR.compute_kernels()
            cMMR.Y0=cMMR.YKernel.get_train(cMMR.itrain)   ## candidates
            
            t0=time.clock()
            ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
            cOptDual=cMMR.mmr_train()
            xtime[1]=time.clock()-t0
      ## cls transfers the dual variables to the test procedure
      ## compute tests 
      ## check the train accuracy
            ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
            cPredictTra = cMMR.mmr_test(cOptDual,itraindata=0)
      ## counts the proportion the ones predicted correctly    
      ## ######################################
            if cMMR.itestmode==2:
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTra)
            else:
              ypred=cPredictTra.zPred
            cEvaluationTra= \
                  mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                     ypred)
            xresult_train[iipar,irepeat,ifold]=cEvaluationTra.accuracy
      ## ######################################     
      ## check the test accuracy
            t0=time.clock()
            ## !!!!!!!!!!!!!!!!!!!!!!! np "params"
            cPredictTes = cMMR.mmr_test(cOptDual,itraindata=1)
      ## counts the proportion the ones predicted correctly
            if cMMR.itestmode==2:
              ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                cPredictTes)
            else:
              ypred=cPredictTes.zPred
            ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
            cEvaluationTes= \
                  mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                     ypred)

            xtime[2] = time.clock() - t0
            xresult_test[iipar,irepeat,ifold] = cEvaluationTes.accuracy

            xpr[iipar,irepeat,ifold,0]=cEvaluationTes.precision
            xpr[iipar,irepeat,ifold,1]=cEvaluationTes.recall
            xpr[iipar,irepeat,ifold,2]=cEvaluationTes.f1
            xpr[iipar,irepeat,ifold,3]=cEvaluationTes.accuracy

            # (added by simon) for now i will just add the new data to
            # the dataset with a random label and check the confusion
            # matrix --> very ugly solution but i cant figure it out
            # in a clean way
            # print(cEvaluationTes.classconfusion)
            evaluatedRes = [row[0] for row in cEvaluationTes.classconfusion]
            evaluatedRes.append(cvalidation.validationScore)
            #nonZeroIndexes = [i for i, e in enumerate(evaluatedRes) if e != 0]
            #print(evaluatedRes)
            #return nonZeroIndexes[0]
            return evaluatedRes
            try:
              xclassconfusion+=cEvaluationTes.classconfusion
            except:
              (n,n)=cEvaluationTes.classconfusion.shape
              xclassconfusion=np.zeros((n,n))
              xclassconfusion+=cEvaluationTes.classconfusion
            ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)
            mmr_report.mmr_report('Result on one fold',
                   xresult_train[iipar,irepeat,ifold], \
                   xresult_test[iipar,irepeat,ifold], \
                   xpr[iipar,irepeat,ifold,:])

        sys.stdout.flush()

        dresult[ifeature]=(cMMR.xbias,np.mean(np.mean(xpr[iipar,:,:,:],0),0))

    for sfeature_type,tresult in dresult.items():
      ## xhead=cMMR.xbias
      xhead=''
      lresult.append((xhead,tresult))
  
  return [-1]
예제 #9
0
파일: roar_main.py 프로젝트: ipa-nhg/kukadu
def roar_main(workmode):

  params=mmr_setparams.cls_params()
  params.setvalidation()
  params.setsolver()
  params.setgeneral()
  params.setoutput()
  params.setinput()

## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  xdatacls=mvm_mvm_cls.cls_mvm()

  roar_prepare.roar_prepare(xdatacls)

  nfold=xdatacls.nfold
  if xdatacls.itestmode in (0,3):
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation
  nrepeat=xdatacls.nrepeat

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  scombine=''

  if xdatacls.itestmode==0:
    if xdatacls.ibootstrap==0:
      fname='xresultte_rand'+scombine+'.csv'
    elif xdatacls.ibootstrap==1:  
      fname='xresultte_active'+scombine+'.csv'
    elif xdatacls.ibootstrap==2:  
      fname='xresultte_greedy'+scombine+'.csv'
    elif xdatacls.ibootstrap==3:  
      fname='xresultte_act_rand'+scombine+'.csv'
  else:
    fname='xresultte_ncross'+scombine+'.csv'

  ## xdatacls.YKernel.ymax=ctables.ncategory
  # it will be recomputed in mvm_ranges
  xdatacls.YKernel.ymin=0
  xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges
  xdatacls.YKernel.ystep=1  

  # load the databases
  # data file
  ndata=xdatacls.ndata
  
##  set_printoptions(precision=4)
  npar=1   ## number of parameter selected for random subsample
  
  nparam=4    # C,D,par1,par2
  nreport=4   ## accuracy, precision, recall, f1

  if xdatacls.itestmode==0:
    nrepeat0=ndata-1   ## active learning
  else:
    nrepeat0=nrepeat

  if xdatacls.itestmode==0:
    ## initialize the active learning seeds
    ## pzero=0.001
    ## xselector=1*(np.random.rand(ndata)<pzero)

    nzero=100  ## !!!!!!!! initial training size
    xselector=np.zeros(ndata)
    nprime=4999
    ip=0
    for i in range(nzero):
      ip+=nprime
      if ip>ndata:
        ip=ip%ndata
      xselector[ip]=1  

    ndatainit=int(np.sum(xselector))
    mtest=ndata-ndatainit
    xdatacls.itest=np.where(xselector==0)[0]
    icandidate_w=-1
    icandidate_b=-1
    ## nrepeat0=ndata-ndatainit-10
    nrepeat0=min(100000,ndata-ndatainit-1000)  ## !!!!!! test size
    ## nrepeat0=1
  else:   ## n-fold cross validation
    nrepeat0=nrepeat
    
  xresulttr=np.zeros((nrepeat0,nfold0))
  xresultte=np.zeros((nrepeat0,nfold0,nreport))
  xbest_param=np.zeros((nrepeat0,nfold0,nparam))

  # ############################################################

  # number iterations in the optimization
  params.solver.niter=100
  print('niter:',params.solver.niter)

  for ipar in range(npar):

    nval=len(xdatacls.YKernel.valrange)
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    ireport=0
    ## for irepeat in range(int(float(ndata)/3)):
    for irepeat in range(nrepeat0):

      if xdatacls.itestmode==0:
        if xdatacls.ibootstrap==0:
          if icandidate_w>=0:
            icandidate_w=np.random.randint(mtest,size=1)
            icandidate_w=xdatacls.itest[icandidate_w]
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best 
        elif xdatacls.ibootstrap==1:  ## worst confidence
          if icandidate_w>=0:
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best 
        elif xdatacls.ibootstrap==2:  ## best confidence
          if icandidate_b>=0:
            xselector[icandidate_b]=1
        elif xdatacls.ibootstrap==3:  ## worst+random
          if icandidate_w>=0:
            pselect=np.random.rand()
            if pselect<0.5:
              icandidate_w=np.random.randint(mtest)
              icandidate_w=xdatacls.itest[icandidate_w]
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best
      elif xdatacls.itestmode==1:   ## n-fold cross-validation
        ## !!! Emre !!!
        xselector=np.floor(np.random.random(ndata)*nfold0)
        xselector=xselector-(xselector==nfold0)

      ## if xdatacls.itestmode==1:  ## n-fold crossvalidation
      ##   xselector=np.random.randint(nfold0, size=ndata)
      ## elif xdatacls.itestmode==2:  ## random subset
      ##   xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## for test only
      elif xdatacls.itestmode==-1:
        for i in range(ndata):
          xselector[i]=i%nfold0
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!          
##        xselector_row=np.floor(nfold0*np.random.rand(nrow))

      for ifold in range(nfold0):

        xdatacls.split_train_test(xselector,ifold)
        mtest=len(xdatacls.itest)
        if mtest<=0:
          print('!!!!!!!')
          break

        print('mtest:',mtest,'mtrain:',len(xdatacls.itrain))

        xdatacls.mvm_datasplit()        

    # sparse matrices of ranks-row_avarage-col_average+total_avarege  
        xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \
                                     params)
        xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \
                                          xdatacls.nrow,params)
        ## mvm_loadmatrix(xdatacls,isubset_tra,params)
        if xdatacls.category==0:
          mvm_glm(xdatacls,params)
          mvm_ygrid(xdatacls,params)
        elif xdatacls.category==1:
          mvm_largest_category(xdatacls)
        elif xdatacls.category==2:
          mvm_largest_category(xdatacls)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        params.validation.rkernel=xdatacls.XKernel[0].title
        if params.validation.rkernel in xdatacls.dkernels:
          kernbest=xdatacls.dkernels[params.validation.rkernel].kernel_params
        else:
          kernbest=xdatacls.XKernel[0].kernel_params
        
        if params.validation.ivalid==1:
          best_param=mvm_validation(xdatacls,params)
        else:
          best_param=cls_empty_class()
          best_param.c=xdatacls.penalty.c
          best_param.d=xdatacls.penalty.d
          best_param.par1=kernbest.ipar1
          best_param.par2=kernbest.ipar2

        xdatacls.penalty.c=best_param.c
        xdatacls.penalty.d=best_param.d
        kernbest.ipar1=best_param.par1
        kernbest.ipar2=best_param.par2

        print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \
              kernbest.ipar1,kernbest.ipar2)
        
        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train(params)
        print('Training time:',time.time()-time0)
        
    # cls transfers the dual variables to the test procedure
    # compute test 

    # check the train accuracy
        print('test on training')

    # $$$ # counts the proportion the ones predicted correctly    
    # $$$ # ######################################
    # $$$     deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \
    # $$$                      xranges_tra,Zrow)
    # $$$     xresulttr(irepeat,ifold)=deval
    # ######################################     
    # check the test accuracy
        print('test on test')
        time0=time.time()
        cPredict=xdatacls.mvm_test(cOptDual.alpha,params)
        print('Test time:',time.time()-time0)

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                          xdatacls.nrow,xdatacls,cPredict.Zrow)
        print('Evaluation time:',time.time()-time0)

        if xdatacls.ieval_type==0:
          xresultte[irepeat,ifold,0]=cEval.accuracy
          ## prediction of effective categories
          ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \
          ##           np.sum(cEval.xconfusion[1:,1:])
          ## xresultte[irepeat,ifold,1]=part_accuracy
          xresultte[irepeat,ifold,1]=cEval.precision
          xresultte[irepeat,ifold,2]=cEval.recall
          xresultte[irepeat,ifold,3]=cEval.f1
        elif xdatacls.ieval_type==10:
          xresultte[irepeat,ifold,0]=cEval.accuracy
          xconfusion3[irepeat,ifold]=cEval.xconfusion3
        else:
          xresultte[irepeat,ifold,0]=cEval.deval
        icandidate_w=xdatacls.itest[icandidate_w]
        icandidate_b=xdatacls.itest[icandidate_b]
        ireport+=1

        ## print(cEval.xconfusion)
        if xdatacls.ieval_type!=10:
          for xconfrow in cEval.xconfusion:
            for ditem in xconfrow:
              print('%7.0f'%ditem,end='')
            print()
          print()
        else:
          for xtable in cEval.xconfusion3:
            xsum=np.sum(xtable)
            if xsum==0:
              xsum=1
            xtable=100*xtable/xsum
            for xconfrow in xtable:
              for ditem in xconfrow:
                print('%8.4f'%ditem,end='')
              print()
            print()
          print()
        
    # ####################################    
        print('*** ipar, repeatation, fold ***') 
        print(ipar,irepeat,ifold)

        if xdatacls.itestmode==1: ## n-fold crossvalidation
          print('Result in one fold and one repeatation')
          ## print('Accuracy on train')
          ## print(xresulttr[irepeat,ifold])
          print('Accuracy on test')
          if xdatacls.ieval_type==0:
            print(xresultte[irepeat,ifold])
          else:
            print(xresultte[irepeat,ifold,0])

      print('Result in one repetation')
      print('Mean and std of the accuracy on test')
      if xdatacls.ieval_type==0:
        print(np.mean(xresultte[irepeat,:,0]),
            np.std(xresultte[irepeat,:,0]))
      else:
        print(np.mean(xresultte[irepeat,:,0]),
            np.std(xresultte[irepeat,:,0]))
        
      sys.stdout.flush()
        
      if xdatacls.itestmode==0: ## n-fold crossvalidation
        np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
      else:
        if xdatacls.ieval_type==0:
          np.savetxt(fname,xresultte[:ireport,:,:],delimiter=',',fmt='%6.4f')
        else:
          np.savetxt(fname,xresultte[:ireport,:,0],delimiter=',',fmt='%6.4f')

    print('***** Overall result ****')
    print('Mean and std of the accuracy on test + error')
    if xdatacls.ieval_type==0:
      print(np.mean(xresultte[:,:,0]),
            np.std(xresultte[:,:,0]))
    else:
      print(np.mean(xresultte[:,:,0]),
            np.std(xresultte[:,:,0]))

#     if xdatacls.ieval_type==10:
#       confusion_latex(xconfusion3,lfiles)      
      
    print('Average best parameters')
    ##  sfield=dir(best_param)
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
    ##    print(sfield[i])
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')
  
  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return
예제 #10
0
def test_mvm_main(workmode):

    params = mmr_setparams.cls_params()

    xdatacls = mvm_mvm_cls.cls_mvm()
    nfold = xdatacls.nfold
    if xdatacls.itestmode == 0:
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation

    nparacc = 2  ## rmse, time
    npar = 1
    xsummary = np.zeros((npar, nparacc))

    ifile = 0
    pselect = 0.05
    itrates = 1
    print('ifile:', ifile)
    print('itrates:', itrates)
    print('pselect:', pselect)
    lfiles = []

    for ipar in range(npar):

        rmatrix = mvm_random_matrix.cls_label_files()
        (xdata, nrow2, ncol2) = rmatrix.load(ifile, pselect, itrain=itrates)
        xdatacls.load_data(xdata,xdatacls.categorymax, \
                           int(nrow2),int(ncol2),None)
        scombine = ''
        if xdatacls.itestmode == 0:
            if xdatacls.ibootstrap == 0:
                fname = 'xresultte_rand' + scombine + '.csv'
            elif xdatacls.ibootstrap == 1:
                fname = 'xresultte_active' + scombine + '.csv'
            elif xdatacls.ibootstrap == 2:
                fname = 'xresultte_greedy' + scombine + '.csv'
            elif xdatacls.ibootstrap == 3:
                fname = 'xresultte_act_rand' + scombine + '.csv'
        else:
            fname = 'xresultte_ncross' + scombine + '.csv'

        xdatacls.YKernel.ymax = 1
        # it will be recomputed in mvm_ranges
        xdatacls.YKernel.ymin = -1
        xdatacls.YKernel.yrange = 200  # it will be recomputed in classcol_ranges
        xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                                /xdatacls.YKernel.yrange
        ##  set_printoptions(precision=4)
        nparam = 4  # C,D,par1,par2
        nreport = 4  ## accuracy, precision, recall, f1

        xdatacls.prepare_repetition_folding(init_train_size=100)
        nrepeat0 = xdatacls.nrepeat0
        nfold0 = xdatacls.nfold0

        creport = mmr_report_cls.cls_mmr_report()
        creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport)
        xbest_param = np.zeros((nrepeat0, nfold0, nparam))

        # ############################################################

        nval = max(xdatacls.YKernel.valrange) + 1
        xconfusion3 = np.zeros(
            (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        xsolvertime = 0.0
        ireport = 0
        for irepeat in range(nrepeat0):

            xdatacls.prepare_repetition_training()

            for ifold in range(nfold0):

                xdatacls.prepare_fold_training(ifold)

                # validation to choose the best parameters
                print('Validation')
                xdatacls.set_validation()
                cvalidation = mvm_validation_cls.cls_mvm_validation()
                cvalidation.validation_rkernel = xdatacls.XKernel[0].title
                best_param = cvalidation.mvm_validation(xdatacls)

                print('Parameters:',best_param.c,best_param.d, \
                      best_param.par1,best_param.par2)

                print('Best parameters found by validation')
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print('training')

                time0 = time.time()
                cOptDual = xdatacls.mvm_train()
                xsolvertime += xdatacls.solvertime
                print('Training time:', time.time() - time0)
                sys.stdout.flush()

                # check the train accuracy
                print('test on training')

                # check the test accuracy
                print('test on test')
                time0 = time.time()
                cPredict = xdatacls.mvm_test()
                print('Test time:', time.time() - time0)
                sys.stdout.flush()

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                                  xdatacls.nrow,xdatacls,cPredict.Zrow)
                print('Evaluation time:', time.time() - time0)
                (qtest, qpred) = makearray(xdatacls, cPredict.Zrow)

                if xdatacls.ieval_type == 0:
                    creport.set_xaprf(irepeat, ifold, cEval)
                elif xdatacls.ieval_type == 10:
                    creport.set_xaprf(irepeat, ifold, cEval)
                    xconfusion3[irepeat, ifold] = cEval.xconfusion3
                else:
                    creport.set_xaprf(irepeat, ifold, cEval)

                xdatacls.icandidate_w = xdatacls.itest[icandidate_w]
                xdatacls.icandidate_b = xdatacls.itest[icandidate_b]
                ireport += 1

                ## print(cEval.xconfusion)
                if xdatacls.ieval_type == 0:
                    for xconfrow in cEval.xconfusion:
                        for ditem in xconfrow:
                            print('%7.0f' % ditem, end='')
                        print()
                    print()
                elif xdatacls.ieval_type == 10:
                    for xtable in cEval.xconfusion3:
                        xsum = np.sum(xtable)
                        if xsum == 0:
                            xsum = 1
                        xtable = 100 * xtable / xsum
                        for xconfrow in xtable:
                            for ditem in xconfrow:
                                print('%9.4f' % ditem, end='')
                            print()
                        print()
                    print()

        # ####################################
                print('*** ipar, repeatation, fold ***')
                print(ipar, irepeat, ifold)

                if xdatacls.itestmode == 1:  ## n-fold crossvalidation

                    creport.report_prf(xmask=[irepeat,ifold], \
                                       stitle='Result in one fold and one repetation', \
                                       ssubtitle='Accuracy on test')

            creport.report_prf(xmask=[irepeat,None], \
                               stitle='Result in one repetation', \
                               ssubtitle='Mean and std of the accuracy on test')

            sys.stdout.flush()

            if xdatacls.itestmode == 0:  ## n-fold crossvalidation
                np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \
                           fmt='%6.4f')
            else:
                if xdatacls.ieval_type == 0:
                    np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \
                               fmt='%6.4f')
                else:
                    np.savetxt(fname,
                               creport.xaprf[:, :, 0],
                               delimiter=',',
                               fmt='%6.4f')

        (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                         stitle='***** Overall result ****', \
                         ssubtitle='Mean and std of the accuracy on test + error')

        xsummary[ipar, 0] = xmean[0]
        xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0)

        if xdatacls.ieval_type == 10:
            confusion_latex(xconfusion3, lfiles)

        print('Average best parameters')
        xlabels = ('c', 'd', 'par1', 'par2')
        for i in range(nparam):
            print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                    '(',np.std(xbest_param[:,:,i]),')')

    print('$$$$$$$$$ Summary results:')
    (m, n) = xsummary.shape
    for i in range(m):
        for j in range(n):
            print('%10.4f' % xsummary[i, j], end='')
        print()

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print('Bye')

    return
예제 #11
0
def roar_main(workmode):

    params = mmr_setparams.cls_params()
    params.setvalidation()
    params.setsolver()
    params.setgeneral()
    params.setoutput()
    params.setinput()

    ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    xdatacls = mvm_mvm_cls.cls_mvm()

    roar_prepare.roar_prepare(xdatacls)

    nfold = xdatacls.nfold
    if xdatacls.itestmode in (0, 3):
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation
    nrepeat = xdatacls.nrepeat

    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    scombine = ''

    if xdatacls.itestmode == 0:
        if xdatacls.ibootstrap == 0:
            fname = 'xresultte_rand' + scombine + '.csv'
        elif xdatacls.ibootstrap == 1:
            fname = 'xresultte_active' + scombine + '.csv'
        elif xdatacls.ibootstrap == 2:
            fname = 'xresultte_greedy' + scombine + '.csv'
        elif xdatacls.ibootstrap == 3:
            fname = 'xresultte_act_rand' + scombine + '.csv'
    else:
        fname = 'xresultte_ncross' + scombine + '.csv'

    ## xdatacls.YKernel.ymax=ctables.ncategory
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin = 0
    xdatacls.YKernel.yrange = 100  # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep = 1

    # load the databases
    # data file
    ndata = xdatacls.ndata

    ##  set_printoptions(precision=4)
    npar = 1  ## number of parameter selected for random subsample

    nparam = 4  # C,D,par1,par2
    nreport = 4  ## accuracy, precision, recall, f1

    if xdatacls.itestmode == 0:
        nrepeat0 = ndata - 1  ## active learning
    else:
        nrepeat0 = nrepeat

    if xdatacls.itestmode == 0:
        ## initialize the active learning seeds
        ## pzero=0.001
        ## xselector=1*(np.random.rand(ndata)<pzero)

        nzero = 100  ## !!!!!!!! initial training size
        xselector = np.zeros(ndata)
        nprime = 4999
        ip = 0
        for i in range(nzero):
            ip += nprime
            if ip > ndata:
                ip = ip % ndata
            xselector[ip] = 1

        ndatainit = int(np.sum(xselector))
        mtest = ndata - ndatainit
        xdatacls.itest = np.where(xselector == 0)[0]
        icandidate_w = -1
        icandidate_b = -1
        ## nrepeat0=ndata-ndatainit-10
        nrepeat0 = min(100000, ndata - ndatainit - 1000)  ## !!!!!! test size
        ## nrepeat0=1
    else:  ## n-fold cross validation
        nrepeat0 = nrepeat

    xresulttr = np.zeros((nrepeat0, nfold0))
    xresultte = np.zeros((nrepeat0, nfold0, nreport))
    xbest_param = np.zeros((nrepeat0, nfold0, nparam))

    # ############################################################

    # number iterations in the optimization
    params.solver.niter = 100
    print('niter:', params.solver.niter)

    for ipar in range(npar):

        nval = len(xdatacls.YKernel.valrange)
        xconfusion3 = np.zeros(
            (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        ireport = 0
        ## for irepeat in range(int(float(ndata)/3)):
        for irepeat in range(nrepeat0):

            if xdatacls.itestmode == 0:
                if xdatacls.ibootstrap == 0:
                    if icandidate_w >= 0:
                        icandidate_w = np.random.randint(mtest, size=1)
                        icandidate_w = xdatacls.itest[icandidate_w]
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
                elif xdatacls.ibootstrap == 1:  ## worst confidence
                    if icandidate_w >= 0:
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
                elif xdatacls.ibootstrap == 2:  ## best confidence
                    if icandidate_b >= 0:
                        xselector[icandidate_b] = 1
                elif xdatacls.ibootstrap == 3:  ## worst+random
                    if icandidate_w >= 0:
                        pselect = np.random.rand()
                        if pselect < 0.5:
                            icandidate_w = np.random.randint(mtest)
                            icandidate_w = xdatacls.itest[icandidate_w]
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
            elif xdatacls.itestmode == 1:  ## n-fold cross-validation
                ## !!! Emre !!!
                xselector = np.floor(np.random.random(ndata) * nfold0)
                xselector = xselector - (xselector == nfold0)

            ## if xdatacls.itestmode==1:  ## n-fold crossvalidation
            ##   xselector=np.random.randint(nfold0, size=ndata)
            ## elif xdatacls.itestmode==2:  ## random subset
            ##   xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## for test only
            elif xdatacls.itestmode == -1:
                for i in range(ndata):
                    xselector[i] = i % nfold0
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
##        xselector_row=np.floor(nfold0*np.random.rand(nrow))

            for ifold in range(nfold0):

                xdatacls.split_train_test(xselector, ifold)
                mtest = len(xdatacls.itest)
                if mtest <= 0:
                    print('!!!!!!!')
                    break

                print('mtest:', mtest, 'mtrain:', len(xdatacls.itrain))

                xdatacls.mvm_datasplit()

                # sparse matrices of ranks-row_avarage-col_average+total_avarege
                xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \
                                             params)
                xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \
                                                  xdatacls.nrow,params)
                ## mvm_loadmatrix(xdatacls,isubset_tra,params)
                if xdatacls.category == 0:
                    mvm_glm(xdatacls, params)
                    mvm_ygrid(xdatacls, params)
                elif xdatacls.category == 1:
                    mvm_largest_category(xdatacls)
                elif xdatacls.category == 2:
                    mvm_largest_category(xdatacls)

        # validation to choose the best parameters
                print('Validation')
                xdatacls.set_validation()
                params.validation.rkernel = xdatacls.XKernel[0].title
                if params.validation.rkernel in xdatacls.dkernels:
                    kernbest = xdatacls.dkernels[
                        params.validation.rkernel].kernel_params
                else:
                    kernbest = xdatacls.XKernel[0].kernel_params

                if params.validation.ivalid == 1:
                    best_param = mvm_validation(xdatacls, params)
                else:
                    best_param = cls_empty_class()
                    best_param.c = xdatacls.penalty.c
                    best_param.d = xdatacls.penalty.d
                    best_param.par1 = kernbest.ipar1
                    best_param.par2 = kernbest.ipar2

                xdatacls.penalty.c = best_param.c
                xdatacls.penalty.d = best_param.d
                kernbest.ipar1 = best_param.par1
                kernbest.ipar2 = best_param.par2

                print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \
                      kernbest.ipar1,kernbest.ipar2)

                print('Best parameters found by validation')
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print('training')

                time0 = time.time()
                cOptDual = xdatacls.mvm_train(params)
                print('Training time:', time.time() - time0)

                # cls transfers the dual variables to the test procedure
                # compute test

                # check the train accuracy
                print('test on training')

                # $$$ # counts the proportion the ones predicted correctly
                # $$$ # ######################################
                # $$$     deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \
                # $$$                      xranges_tra,Zrow)
                # $$$     xresulttr(irepeat,ifold)=deval
                # ######################################
                # check the test accuracy
                print('test on test')
                time0 = time.time()
                cPredict = xdatacls.mvm_test(cOptDual.alpha, params)
                print('Test time:', time.time() - time0)

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                                  xdatacls.nrow,xdatacls,cPredict.Zrow)
                print('Evaluation time:', time.time() - time0)

                if xdatacls.ieval_type == 0:
                    xresultte[irepeat, ifold, 0] = cEval.accuracy
                    ## prediction of effective categories
                    ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \
                    ##           np.sum(cEval.xconfusion[1:,1:])
                    ## xresultte[irepeat,ifold,1]=part_accuracy
                    xresultte[irepeat, ifold, 1] = cEval.precision
                    xresultte[irepeat, ifold, 2] = cEval.recall
                    xresultte[irepeat, ifold, 3] = cEval.f1
                elif xdatacls.ieval_type == 10:
                    xresultte[irepeat, ifold, 0] = cEval.accuracy
                    xconfusion3[irepeat, ifold] = cEval.xconfusion3
                else:
                    xresultte[irepeat, ifold, 0] = cEval.deval
                icandidate_w = xdatacls.itest[icandidate_w]
                icandidate_b = xdatacls.itest[icandidate_b]
                ireport += 1

                ## print(cEval.xconfusion)
                if xdatacls.ieval_type != 10:
                    for xconfrow in cEval.xconfusion:
                        for ditem in xconfrow:
                            print('%7.0f' % ditem, end='')
                        print()
                    print()
                else:
                    for xtable in cEval.xconfusion3:
                        xsum = np.sum(xtable)
                        if xsum == 0:
                            xsum = 1
                        xtable = 100 * xtable / xsum
                        for xconfrow in xtable:
                            for ditem in xconfrow:
                                print('%8.4f' % ditem, end='')
                            print()
                        print()
                    print()

        # ####################################
                print('*** ipar, repeatation, fold ***')
                print(ipar, irepeat, ifold)

                if xdatacls.itestmode == 1:  ## n-fold crossvalidation
                    print('Result in one fold and one repeatation')
                    ## print('Accuracy on train')
                    ## print(xresulttr[irepeat,ifold])
                    print('Accuracy on test')
                    if xdatacls.ieval_type == 0:
                        print(xresultte[irepeat, ifold])
                    else:
                        print(xresultte[irepeat, ifold, 0])

            print('Result in one repetation')
            print('Mean and std of the accuracy on test')
            if xdatacls.ieval_type == 0:
                print(np.mean(xresultte[irepeat, :, 0]),
                      np.std(xresultte[irepeat, :, 0]))
            else:
                print(np.mean(xresultte[irepeat, :, 0]),
                      np.std(xresultte[irepeat, :, 0]))

            sys.stdout.flush()

            if xdatacls.itestmode == 0:  ## n-fold crossvalidation
                np.savetxt(fname,
                           xresultte[:ireport, 0, :],
                           delimiter=',',
                           fmt='%6.4f')
            else:
                if xdatacls.ieval_type == 0:
                    np.savetxt(fname,
                               xresultte[:ireport, :, :],
                               delimiter=',',
                               fmt='%6.4f')
                else:
                    np.savetxt(fname,
                               xresultte[:ireport, :, 0],
                               delimiter=',',
                               fmt='%6.4f')

        print('***** Overall result ****')
        print('Mean and std of the accuracy on test + error')
        if xdatacls.ieval_type == 0:
            print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0]))
        else:
            print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0]))

#     if xdatacls.ieval_type==10:
#       confusion_latex(xconfusion3,lfiles)

        print('Average best parameters')
        ##  sfield=dir(best_param)
        xlabels = ('c', 'd', 'par1', 'par2')
        for i in range(nparam):
            ##    print(sfield[i])
            print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                    '(',np.std(xbest_param[:,:,i]),')')

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print('Bye')

    return
예제 #12
0
파일: mmr_main.py 프로젝트: ipa-nhg/kukadu
def mmr_main(iworkmode):

  params=mmr_setparams.cls_params()



## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  list_features=["annot","DenseHue","DenseHueV3H1", \
                "DenseSift","DenseSiftV3H1","Gist", \
                "HarrisHue","HarrisHueV3H1","HarrisSift", \
                "HarrisSiftV3H1","Hsv","HsvV3H1","Lab", \
                "LabV3H1","Rgb","RgbV3H1"]

  ## data files in the corresponding directories
  datadirs=['corel5k','espgame','iaprtc12','mirflickr','pascal07']

  ## Example lfeatures=[4,8] means we selected the features:
  ##                                "DenseSiftV3H1" and  "HarrisSift"   
  lfeatures=[4]
  params.ninputview=len(lfeatures)
  idata=0       ## process corel5k data set

  xdatacls=mmr_mmr_cls.cls_mmr(params.ninputview)
  nfold=xdatacls.nfold
  nrepeat=xdatacls.nrepeat
  print('Xbias:',xdatacls.xbias)

  cdata_store=mmr_load_data.cls_data_load()  
  cdata_store.load_data(xdatacls,idata,lfeatures)
  mdata=xdatacls.mdata

  ## initializing the array collecting the results
  nscore=4
  nipar=1
  if xdatacls.crossval_mode==0:   ## random
    nfold0=nfold
    xresult_test=np.zeros((nipar,nrepeat,nfold0))
    xresult_train=np.zeros((nipar,nrepeat,nfold0))
    xpr=np.zeros((nipar,nrepeat,nfold0,nscore))
  elif xdatacls.crossval_mode==1:  ## predefined trianing and test
    nrepeat=1
    nfold0=1
    xresult_test=np.zeros((nipar,nrepeat,nfold0))
    xresult_train=np.zeros((nipar,nrepeat,nfold0))
    xpr=np.zeros((nipar,nrepeat,nfold0,nscore))


  ## -----------------------------------------------
  print('Output kernel type: ',xdatacls.YKernel.kernel_params.kernel_type)
  for i in range(params.ninputview):
    print(i,'Input kernel type: ',xdatacls.XKernel[i].kernel_params.kernel_type)
  ## -------------------------------------------------

  xcross=np.zeros((mdata,mdata))

  xtime=np.zeros(5)
## ############################################################
  nparam=4    ## C,D,par1,par2
  xbest_param=np.zeros((nrepeat,nfold0,nparam))

  for iipar in range(nipar):

    print('===================================================')
    for irepeat in range(nrepeat):

      xdatacls.prepare_repetition_training(nfold0)

      for ifold in range(nfold0):

        xdatacls.prepare_fold_training(ifold)

        ## validation to choose the best parameters
        print('Validation')
        t0=time.clock()
        xdatacls.set_validation()
        cvalidation=mmr_validation_cls.cls_mmr_validation()
        cvalidation.validation_rkernel=xdatacls.XKernel[0].title
        best_param=cvalidation.mmr_validation(xdatacls)

        xtime[0]=time.clock()-t0

        print('Best parameters found by validation')
        print('c: ',best_param.c)
        print('d: ',best_param.d)
        print('par1: ',best_param.par1)
        print('par2: ',best_param.par2)
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

        xdatacls.compute_kernels()
        xdatacls.Y0=xdatacls.YKernel.get_train(xdatacls.itrain)   ## candidates

  ## training with the best parameters
        print('Training')

        print(xdatacls.YKernel.kernel_params.kernel_type, \
              xdatacls.YKernel.kernel_params.ipar1, \
              xdatacls.YKernel.kernel_params.ipar2)
        for iview in range(xdatacls.ninputview):
          print(xdatacls.XKernel[iview].kernel_params.kernel_type, \
                xdatacls.XKernel[iview].kernel_params.ipar1, \
                xdatacls.XKernel[iview].kernel_params.ipar2)


        t0=time.clock()
        cOptDual=xdatacls.mmr_train()
        xtime[1]=time.clock()-t0
  ## cls transfers the dual variables to the test procedure
  ## compute tests 
  ## check the train accuracy
        print('Test')
        cPredictTra=xdatacls.mmr_test(cOptDual,itraindata=0)
  ## counts the proportion the ones predicted correctly    
  ## ######################################
        if xdatacls.itestmode==2:
          print('Test knn')
          ypred=inverse_knn(xdatacls.YKernel.get_Y0(xdatacls.itrain), \
                            cPredictTra)
        else:
          ypred=cPredictTra.zPred
        cEvaluationTra= \
              mmr_eval_binvector(xdatacls.YKernel.get_train(xdatacls.itrain), \
                                 ypred)
        xresult_train[iipar,irepeat,ifold]=cEvaluationTra.accuracy
        print('>>>>>>>>>>>\n',cEvaluationTra.confusion)
  ## ######################################     
  ## check the test accuracy
        t0=time.clock()
        cPredictTes= xdatacls.mmr_test(cOptDual,itraindata=1)
  ## counts the proportion the ones predicted correctly
        if xdatacls.itestmode==2:
          ypred=inverse_knn(xdatacls.YKernel.get_Y0(xdatacls.itrain), \
                            cPredictTes)
        else:
          ypred=cPredictTes.zPred
        ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
        cEvaluationTes= \
              mmr_eval_binvector(xdatacls.YKernel.get_test(xdatacls.itest), \
                                 ypred)

        xtime[2]=time.clock()-t0
        xresult_test[iipar,irepeat,ifold]=cEvaluationTes.accuracy

        xpr[iipar,irepeat,ifold,0]=cEvaluationTes.precision
        xpr[iipar,irepeat,ifold,1]=cEvaluationTes.recall
        xpr[iipar,irepeat,ifold,2]=cEvaluationTes.f1
        xpr[iipar,irepeat,ifold,3]=cEvaluationTes.accuracy

        print(cEvaluationTes.confusion)
        print(cEvaluationTes.classconfusion)
        try:
          xclassconfusion+=cEvaluationTes.classconfusion
        except:
          (n,n)=cEvaluationTes.classconfusion.shape
          xclassconfusion=np.zeros((n,n))
          xclassconfusion+=cEvaluationTes.classconfusion
        ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)

  ## ####################################
        print('Parameter:',iipar,'Repetition: ',irepeat, \
              'Fold: ',ifold)
        mmr_report.mmr_report('Result on one fold',
                   xresult_train[iipar,irepeat,ifold], \
                   xresult_test[iipar,irepeat,ifold], \
                   xpr[iipar,irepeat,ifold,:])
        print(np.sum(xpr[iipar,irepeat,:ifold+1,:],0)/(ifold+1))

      mmr_report.mmr_report('Result on one repetition',
                 np.mean(xresult_train[iipar,irepeat,:]), \
                 np.mean(xresult_test[iipar,irepeat,:]), \
                 np.mean(xpr[iipar,irepeat,:,:],0))

    mmr_report.mmr_report('Result on all repetitions @@@@@@@',
               np.mean(xresult_train[iipar,:,:].flatten()), \
               np.mean(xresult_test[iipar,:,:].flatten()), \
               np.mean(np.mean(xpr[iipar,:,:,:],0),0))



    print('Average best parameters')
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')

    print('xtime:',xtime)
    sys.stdout.flush()

  print('Bye')
  
  return
예제 #13
0
def mmr_main(iworkmode):

    params = mmr_setparams.cls_params()
    np.set_printoptions(precision=4)

    dresult = {}
    ## ---------------------------------------------
    nview = 1
    nobject = 1
    params.ninputview = nview

    lresult = []

    for iobject in range(nobject):

        for ifeature in range(nview):

            cMMR = mmr_mmr_cls.cls_mmr(params.ninputview)
            nfold = cMMR.nfold
            nrepeat = cMMR.nrepeat
            ## cMMR.xbias=-0.06  ## 4 categories
            cMMR.xbias = 0.0
            ## cMMR.xbias=0.1-ifeature*0.01
            print('Xbias:', cMMR.xbias)

            nscore = 4
            nipar = 1
            if cMMR.crossval_mode == 0:  ## random
                nfold0 = nfold
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))
            elif cMMR.crossval_mode == 1:  ## predefined trianing and test
                nrepeat = 1
                nfold0 = 1
                xresult_test = np.zeros((nipar, nrepeat, nfold0))
                xresult_train = np.zeros((nipar, nrepeat, nfold0))
                xpr = np.zeros((nipar, nrepeat, nfold0, nscore))

        ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

        ## cMMR=mmr_mmr_cls.cls_mmr(params.ninputview)

            cdata_store = vision_load_data.cls_label_files()
            cdata_store.load_mmr(cMMR)
            mdata = cMMR.mdata

            ## -----------------------------------------------
            print('Output kernel type: ',
                  cMMR.YKernel.kernel_params.kernel_type)
            for i in range(params.ninputview):
                print(i, 'Input kernel type: ',
                      cMMR.XKernel[i].kernel_params.kernel_type)
            ## -------------------------------------------------

            xcross = np.zeros((mdata, mdata))

            xtime = np.zeros(5)
            ## ############################################################
            nparam = 4  ## C,D,par1,par2
            xbest_param = np.zeros((nrepeat, nfold0, nparam))

            for iipar in range(nipar):

                print('===================================================')
                for irepeat in range(nrepeat):
                    ## split data into training and test
                    if cMMR.crossval_mode == 0:  ## random selection
                        xselector = np.zeros(mdata)
                        ifold = 0
                        for i in range(mdata):
                            xselector[i] = ifold
                            ifold += 1
                            if ifold >= nfold0:
                                ifold = 0
                        np.random.shuffle(xselector)
                        ## xselector=np.floor(np.random.random(mdata)*nfold0)
                        ## xselector=xselector-(xselector==nfold0)
                    elif cMMR.crossval_mode == 1:  ## preddefined training and test
                        xselector = np.zeros(mdata)
                        xselector[cMMR.ifixtrain] = 1

                    for ifold in range(nfold0):
                        cMMR.split_train_test(xselector, ifold)

                        ## validation to choose the best parameters
                        print('Validation')
                        t0 = time.clock()
                        ## select the kernel to be validated
                        cMMR.set_validation()

                        cvalidation = mmr_validation_cls.cls_mmr_validation()
                        cvalidation.validation_rkernel = cMMR.XKernel[0].title
                        best_param = cvalidation.mmr_validation(cMMR)

                        xtime[0] = time.clock() - t0

                        print('Best parameters found by validation')
                        print('c: ', best_param.c)
                        print('d: ', best_param.d)
                        print('par1: ', best_param.par1)
                        print('par2: ', best_param.par2)
                        xbest_param[irepeat, ifold, 0] = best_param.c
                        xbest_param[irepeat, ifold, 1] = best_param.d
                        xbest_param[irepeat, ifold, 2] = best_param.par1
                        xbest_param[irepeat, ifold, 3] = best_param.par2

                        cMMR.compute_kernels()
                        cMMR.Y0 = cMMR.YKernel.get_train(
                            cMMR.itrain)  ## candidates

                        ## training with the best parameters
                        print('Training')

                        print(cMMR.YKernel.kernel_params.kernel_type, \
                              cMMR.YKernel.kernel_params.ipar1, \
                              cMMR.YKernel.kernel_params.ipar2)
                        for iview in range(cMMR.ninputview):
                            print(cMMR.XKernel[iview].kernel_params.kernel_type, \
                                  cMMR.XKernel[iview].kernel_params.ipar1, \
                                  cMMR.XKernel[iview].kernel_params.ipar2)

                        t0 = time.clock()
                        cOptDual = cMMR.mmr_train()
                        xtime[1] = time.clock() - t0
                        ## cls transfers the dual variables to the test procedure
                        ## compute tests
                        ## check the train accuracy
                        print('Test')
                        cPredictTra = cMMR.mmr_test(cOptDual, itraindata=0)
                        ## counts the proportion the ones predicted correctly
                        ## ######################################
                        if cMMR.itestmode == 2:
                            print('Test knn')
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTra)
                        else:
                            ypred = cPredictTra.zPred
                        cEvaluationTra= \
                              mmr_eval_binvector(cMMR.YKernel.get_train(cMMR.itrain), \
                                                 ypred)
                        xresult_train[iipar, irepeat,
                                      ifold] = cEvaluationTra.accuracy
                        print('>>>>>>>>>>>\n', cEvaluationTra.confusion)
                        ## ######################################
                        ## check the test accuracy
                        t0 = time.clock()
                        cPredictTes = cMMR.mmr_test(cOptDual, itraindata=1)
                        ## counts the proportion the ones predicted correctly
                        if cMMR.itestmode == 2:
                            ypred=inverse_knn(cMMR.YKernel.get_Y0(cMMR.itrain), \
                                              cPredictTes)
                        else:
                            ypred = cPredictTes.zPred
                        ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
                        cEvaluationTes= \
                              mmr_eval_binvector(cMMR.YKernel.get_test(cMMR.itest), \
                                                 ypred)

                        xtime[2] = time.clock() - t0
                        xresult_test[iipar, irepeat,
                                     ifold] = cEvaluationTes.accuracy

                        xpr[iipar, irepeat, ifold,
                            0] = cEvaluationTes.precision
                        xpr[iipar, irepeat, ifold, 1] = cEvaluationTes.recall
                        xpr[iipar, irepeat, ifold, 2] = cEvaluationTes.f1
                        xpr[iipar, irepeat, ifold, 3] = cEvaluationTes.accuracy

                        print(cEvaluationTes.confusion)
                        print(cEvaluationTes.classconfusion)
                        try:
                            xclassconfusion += cEvaluationTes.classconfusion
                        except:
                            (n, n) = cEvaluationTes.classconfusion.shape
                            xclassconfusion = np.zeros((n, n))
                            xclassconfusion += cEvaluationTes.classconfusion
                        ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)

            ## ####################################
                        print('Parameter:',iipar,'Repetition: ',irepeat, \
                              'Fold: ',ifold)
                        mmr_report('Result on one fold',
                                   xresult_train[iipar,irepeat,ifold], \
                                   xresult_test[iipar,irepeat,ifold], \
                                   xpr[iipar,irepeat,ifold,:])
                        print(
                            np.sum(xpr[iipar, irepeat, :ifold + 1, :], 0) /
                            (ifold + 1))

                    mmr_report('Result on one repetition',
                               np.mean(xresult_train[iipar,irepeat,:]), \
                               np.mean(xresult_test[iipar,irepeat,:]), \
                               np.mean(xpr[iipar,irepeat,:,:],0))

                mmr_report('Result on all repetitions @@@@@@@',
                           np.mean(xresult_train[iipar,:,:].flatten()), \
                           np.mean(xresult_test[iipar,:,:].flatten()), \
                           np.mean(np.mean(xpr[iipar,:,:,:],0),0))

                print('Average best parameters')
                ##  sfield=dir(best_param)
                xlabels = ('c', 'd', 'par1', 'par2')
                for i in range(nparam):
                    ##    print(sfield[i])
                    print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                          '(',np.std(xbest_param[:,:,i]),')')

                print('xtime:', xtime)
                sys.stdout.flush()

                dresult[ifeature] = (cMMR.xbias,
                                     np.mean(np.mean(xpr[iipar, :, :, :], 0),
                                             0))

        for sfeature_type, tresult in dresult.items():
            ## xhead=cMMR.xbias
            xhead = ''
            lresult.append((xhead, tresult))

        ## lresult.sort()
        ## for litem in lresult:
        ##   print(litem)

        print('\\begin{tabular}{l|rrr}')
        print('& \\multicolumn{3}{c}{' + 'Objects' + '} \\\\')
        print('Feature type & Precision & Recall & F1 \\\\ \\hline')
        for litem in lresult:
            print(litem[0],' & ','%6.4f'%litem[1][1][0], \
                  ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
        print('\\end{tabular}')

        ## print('\\begin{tabular}{l|rrr}')
        ## print('& \\multicolumn{3}{c}{'+'Objects'+'} \\\\')
        ## print('Feature & xbias & Precision & Recall & F1 \\\\ \\hline')
        ## for litem in lresult:
        ##   print(litem[0],' & ','%6.4f'%litem[1][0],' & ','%6.4f'%litem[1][1][0], \
        ##         ' & ','%6.4f'%litem[1][1][1],' & ','%6.4f'%litem[1][1][2],' \\\\')
        ## print('\\end{tabular}')

    ## ##########################################################
    ## !!!! It saves the optimal dual variables, and optimal, crossvalidated,
    ##  kernel parameters into files given in vision_load_data.

    ## prepare full training with the best parameters

    ifold = 0
    xselector = np.ones(mdata)
    cMMR.split_train_test(xselector, ifold)
    best_param = np.array(
        [np.mean(xbest_param[:, :, i]) for i in range(nparam)])
    cMMR.penalty.c = best_param[0]
    cMMR.penalty.d = best_param[1]
    cMMR.XKernel[0].kernel_params.ipar1 = best_param[2]
    cMMR.XKernel[0].kernel_params.ipar2 = best_param[3]

    cMMR.compute_kernels()
    cMMR.Y0 = cMMR.YKernel.get_train(cMMR.itrain)  ## candidates
    ## training with the best parameters
    print('Full training')
    cOptDual = cMMR.mmr_train()

    np.savetxt(cdata_store.sbasedir+cdata_store.dual_params,cMMR.dual.alpha, \
               fmt='%9.4f')
    np.savetxt(cdata_store.sbasedir+cdata_store.kernel_params,best_param[2:], \
               fmt='%9.4f')

    print(xclassconfusion)

    print('Bye')

    return
예제 #14
0
파일: mmr_main.py 프로젝트: mzillich/kukadu
def mmr_main(iworkmode):

    params = mmr_setparams.cls_params()

    ## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

    list_features=["annot","DenseHue","DenseHueV3H1", \
                  "DenseSift","DenseSiftV3H1","Gist", \
                  "HarrisHue","HarrisHueV3H1","HarrisSift", \
                  "HarrisSiftV3H1","Hsv","HsvV3H1","Lab", \
                  "LabV3H1","Rgb","RgbV3H1"]

    ## data files in the corresponding directories
    datadirs = ['corel5k', 'espgame', 'iaprtc12', 'mirflickr', 'pascal07']

    ## Example lfeatures=[4,8] means we selected the features:
    ##                                "DenseSiftV3H1" and  "HarrisSift"
    lfeatures = [4]
    params.ninputview = len(lfeatures)
    idata = 0  ## process corel5k data set

    xdatacls = mmr_mmr_cls.cls_mmr(params.ninputview)
    nfold = xdatacls.nfold
    nrepeat = xdatacls.nrepeat
    print('Xbias:', xdatacls.xbias)

    cdata_store = mmr_load_data.cls_data_load()
    cdata_store.load_data(xdatacls, idata, lfeatures)
    mdata = xdatacls.mdata

    ## initializing the array collecting the results
    nscore = 4
    nipar = 1
    if xdatacls.crossval_mode == 0:  ## random
        nfold0 = nfold
        xresult_test = np.zeros((nipar, nrepeat, nfold0))
        xresult_train = np.zeros((nipar, nrepeat, nfold0))
        xpr = np.zeros((nipar, nrepeat, nfold0, nscore))
    elif xdatacls.crossval_mode == 1:  ## predefined trianing and test
        nrepeat = 1
        nfold0 = 1
        xresult_test = np.zeros((nipar, nrepeat, nfold0))
        xresult_train = np.zeros((nipar, nrepeat, nfold0))
        xpr = np.zeros((nipar, nrepeat, nfold0, nscore))

    ## -----------------------------------------------
    print('Output kernel type: ', xdatacls.YKernel.kernel_params.kernel_type)
    for i in range(params.ninputview):
        print(i, 'Input kernel type: ',
              xdatacls.XKernel[i].kernel_params.kernel_type)
    ## -------------------------------------------------

    xcross = np.zeros((mdata, mdata))

    xtime = np.zeros(5)
    ## ############################################################
    nparam = 4  ## C,D,par1,par2
    xbest_param = np.zeros((nrepeat, nfold0, nparam))

    for iipar in range(nipar):

        print('===================================================')
        for irepeat in range(nrepeat):

            xdatacls.prepare_repetition_training(nfold0)

            for ifold in range(nfold0):

                xdatacls.prepare_fold_training(ifold)

                ## validation to choose the best parameters
                print('Validation')
                t0 = time.clock()
                xdatacls.set_validation()
                cvalidation = mmr_validation_cls.cls_mmr_validation()
                cvalidation.validation_rkernel = xdatacls.XKernel[0].title
                best_param = cvalidation.mmr_validation(xdatacls)

                xtime[0] = time.clock() - t0

                print('Best parameters found by validation')
                print('c: ', best_param.c)
                print('d: ', best_param.d)
                print('par1: ', best_param.par1)
                print('par2: ', best_param.par2)
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                xdatacls.compute_kernels()
                xdatacls.Y0 = xdatacls.YKernel.get_train(
                    xdatacls.itrain)  ## candidates

                ## training with the best parameters
                print('Training')

                print(xdatacls.YKernel.kernel_params.kernel_type, \
                      xdatacls.YKernel.kernel_params.ipar1, \
                      xdatacls.YKernel.kernel_params.ipar2)
                for iview in range(xdatacls.ninputview):
                    print(xdatacls.XKernel[iview].kernel_params.kernel_type, \
                          xdatacls.XKernel[iview].kernel_params.ipar1, \
                          xdatacls.XKernel[iview].kernel_params.ipar2)

                t0 = time.clock()
                cOptDual = xdatacls.mmr_train()
                xtime[1] = time.clock() - t0
                ## cls transfers the dual variables to the test procedure
                ## compute tests
                ## check the train accuracy
                print('Test')
                cPredictTra = xdatacls.mmr_test(cOptDual, itraindata=0)
                ## counts the proportion the ones predicted correctly
                ## ######################################
                if xdatacls.itestmode == 2:
                    print('Test knn')
                    ypred=inverse_knn(xdatacls.YKernel.get_Y0(xdatacls.itrain), \
                                      cPredictTra)
                else:
                    ypred = cPredictTra.zPred
                cEvaluationTra= \
                      mmr_eval_binvector(xdatacls.YKernel.get_train(xdatacls.itrain), \
                                         ypred)
                xresult_train[iipar, irepeat, ifold] = cEvaluationTra.accuracy
                print('>>>>>>>>>>>\n', cEvaluationTra.confusion)
                ## ######################################
                ## check the test accuracy
                t0 = time.clock()
                cPredictTes = xdatacls.mmr_test(cOptDual, itraindata=1)
                ## counts the proportion the ones predicted correctly
                if xdatacls.itestmode == 2:
                    ypred=inverse_knn(xdatacls.YKernel.get_Y0(xdatacls.itrain), \
                                      cPredictTes)
                else:
                    ypred = cPredictTes.zPred
                ## cEvaluationTes=mmr_eval_binvector(cData.YTest,cPredictTes.zPred)
                cEvaluationTes= \
                      mmr_eval_binvector(xdatacls.YKernel.get_test(xdatacls.itest), \
                                         ypred)

                xtime[2] = time.clock() - t0
                xresult_test[iipar, irepeat, ifold] = cEvaluationTes.accuracy

                xpr[iipar, irepeat, ifold, 0] = cEvaluationTes.precision
                xpr[iipar, irepeat, ifold, 1] = cEvaluationTes.recall
                xpr[iipar, irepeat, ifold, 2] = cEvaluationTes.f1
                xpr[iipar, irepeat, ifold, 3] = cEvaluationTes.accuracy

                print(cEvaluationTes.confusion)
                print(cEvaluationTes.classconfusion)
                try:
                    xclassconfusion += cEvaluationTes.classconfusion
                except:
                    (n, n) = cEvaluationTes.classconfusion.shape
                    xclassconfusion = np.zeros((n, n))
                    xclassconfusion += cEvaluationTes.classconfusion
                ## mmr_eval_label(ZW,iPre,YTesN,Y0,kit_data,itest,params)

    ## ####################################
                print('Parameter:',iipar,'Repetition: ',irepeat, \
                      'Fold: ',ifold)
                mmr_report.mmr_report('Result on one fold',
                           xresult_train[iipar,irepeat,ifold], \
                           xresult_test[iipar,irepeat,ifold], \
                           xpr[iipar,irepeat,ifold,:])
                print(
                    np.sum(xpr[iipar, irepeat, :ifold + 1, :], 0) /
                    (ifold + 1))

            mmr_report.mmr_report('Result on one repetition',
                       np.mean(xresult_train[iipar,irepeat,:]), \
                       np.mean(xresult_test[iipar,irepeat,:]), \
                       np.mean(xpr[iipar,irepeat,:,:],0))

        mmr_report.mmr_report('Result on all repetitions @@@@@@@',
                   np.mean(xresult_train[iipar,:,:].flatten()), \
                   np.mean(xresult_test[iipar,:,:].flatten()), \
                   np.mean(np.mean(xpr[iipar,:,:,:],0),0))

        print('Average best parameters')
        xlabels = ('c', 'd', 'par1', 'par2')
        for i in range(nparam):
            print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                    '(',np.std(xbest_param[:,:,i]),')')

        print('xtime:', xtime)
        sys.stdout.flush()

    print('Bye')

    return
예제 #15
0
def test_mvm_main(workmode):

    params = mmr_setparams.cls_params()

    xdatacls = mvm_mvm_cls.cls_mvm()
    nfold = xdatacls.nfold
    if xdatacls.itestmode == 0:
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation

    nparacc = 2  ## rmse, time
    npar = 1
    xsummary = np.zeros((npar, nparacc))

    lfilenames = ["affordances_instrument_for", "affordances_patient"]
    ifile = 1  ## file index in list above
    lfiles = [0, 1]
    lfeatures = ["PointMutualInformation", "absolute frequency"]
    ifeature = 0
    if xdatacls.itestmode == 3:
        iloadall = 1
    else:
        iloadall = 0

    print("lfiles:", lfilenames)
    print("ifeature:", lfeatures[ifeature])

    for ipar in range(npar):

        ## possible values
        Y0 = np.array([-1, 0, 1])
        ctables = webrel_load_data.cls_label_files()
        print(ctables.listcsv[ifile])
        (xdata, nrow2, ncol2, ifixtrain, ifixtest) = ctables.load_objobj_act(lfiles, ifeature)
        xdatacls.load_data(xdata, xdatacls.categorymax, int(nrow2), int(ncol2), Y0)
        xdatacls.ifixtrain = ifixtrain
        xdatacls.ifixtest = ifixtest

        scombine = ""
        if xdatacls.itestmode == 0:
            if xdatacls.ibootstrap == 0:
                fname = "xresultte_rand" + scombine + ".csv"
            elif xdatacls.ibootstrap == 1:
                fname = "xresultte_active" + scombine + ".csv"
            elif xdatacls.ibootstrap == 2:
                fname = "xresultte_greedy" + scombine + ".csv"
            elif xdatacls.ibootstrap == 3:
                fname = "xresultte_act_rand" + scombine + ".csv"
        else:
            fname = "xresultte_ncross" + scombine + ".csv"

        xdatacls.YKernel.ymax = 10
        # it will be recomputed in mvm_ranges
        xdatacls.YKernel.ymin = -10
        xdatacls.YKernel.yrange = 200  # it will be recomputed in classcol_ranges
        xdatacls.YKernel.ystep = (xdatacls.YKernel.ymax - xdatacls.YKernel.ymin) / xdatacls.YKernel.yrange
        ##  set_printoptions(precision=4)
        nparam = 4  # C,D,par1,par2
        nreport = 4  ## accuracy, precision, recall, f1

        xdatacls.prepare_repetition_folding(init_train_size=100)
        nrepeat0 = xdatacls.nrepeat0
        nfold0 = xdatacls.nfold0
        if xdatacls.itestmode == 3:
            nfold0 = 1

        creport = mmr_report_cls.cls_mmr_report()
        creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport)
        xbest_param = np.zeros((nrepeat0, nfold0, nparam))

        # ############################################################

        nval = max(xdatacls.YKernel.valrange) + 1
        xconfusion3 = np.zeros((nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        xsolvertime = 0.0
        ireport = 0
        for irepeat in range(nrepeat0):

            xdatacls.nfold0 = xdatacls.nfold
            xdatacls.prepare_repetition_training()

            for ifold in range(nfold0):

                xdatacls.prepare_fold_training(ifold)

                # validation to choose the best parameters
                print("Validation")
                xdatacls.set_validation()
                cvalidation = mvm_validation_cls.cls_mvm_validation()
                cvalidation.validation_rkernel = xdatacls.XKernel[0].title
                best_param = cvalidation.mvm_validation(xdatacls)

                print("Parameters:", best_param.c, best_param.d, best_param.par1, best_param.par2)

                print("Best parameters found by validation")
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print("training")

                time0 = time.time()
                cOptDual = xdatacls.mvm_train()
                xsolvertime += xdatacls.solvertime
                print("Training time:", time.time() - time0)
                sys.stdout.flush()

                # check the train accuracy
                print("test on training")

                # check the test accuracy
                print("test on test")
                time0 = time.time()

                if xdatacls.ifulltest == 1:
                    xdatacls.xdata_tes = ctables.full_test()
                    xdatacls.xranges_rel_test = mvm_prepare.mvm_ranges(xdatacls.xdata_tes, xdatacls.nrow)

                cPredict = xdatacls.mvm_test()
                print("Test time:", time.time() - time0)
                sys.stdout.flush()

                filename = "predicted_missing.csv"
                ctables.export_prediction(filename, xdatacls, cPredict.Zrow)

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                if xdatacls.knowntest == 1:
                    (cEval, icandidate_w, icandidate_b) = mvm_eval(
                        xdatacls.ieval_type, xdatacls.nrow, xdatacls, cPredict.Zrow
                    )
                    print("Evaluation time:", time.time() - time0)
                    (qtest, qpred, qpred0) = makearray(xdatacls, cPredict.Zrow)

                    if xdatacls.ieval_type in (0, 11):
                        creport.set_xaprf(irepeat, ifold, cEval)
                    elif xdatacls.ieval_type == 10:
                        creport.set_xaprf(irepeat, ifold, cEval)
                        xconfusion3[irepeat, ifold] = cEval.xconfusion3
                    else:
                        creport.set_xaprf(irepeat, ifold, cEval)

                    ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
                    ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
                    ireport += 1

                    ## print(cEval.xconfusion)
                    if xdatacls.ieval_type in (0, 11):
                        for xconfrow in cEval.xconfusion:
                            for ditem in xconfrow:
                                print("%7.0f" % ditem, end="")
                            print()
                        print()
                    elif xdatacls.ieval_type == 10:
                        for xtable in cEval.xconfusion3:
                            xsum = np.sum(xtable)
                            if xsum == 0:
                                xsum = 1
                            xtable = 100 * xtable / xsum
                            for xconfrow in xtable:
                                for ditem in xconfrow:
                                    print("%9.4f" % ditem, end="")
                                print()
                            print()
                        print()

                    # ####################################
                    print("*** ipar, repeatation, fold ***")
                    print(ipar, irepeat, ifold)

                    if xdatacls.itestmode == 1:  ## n-fold crossvalidation

                        creport.report_prf(
                            xmask=[irepeat, ifold],
                            stitle="Result in one fold and one repetation",
                            ssubtitle="Accuracy on test",
                        )

            if xdatacls.knowntest == 1:
                creport.report_prf(
                    xmask=[irepeat, None],
                    stitle="Result in one repetation",
                    ssubtitle="Mean and std of the accuracy on test",
                )

            sys.stdout.flush()

        if xdatacls.knowntest == 1:
            (xmean, xstd) = creport.report_prf(
                xmask=[None, None],
                stitle="***** Overall result ****",
                ssubtitle="Mean and std of the accuracy on test + error",
            )

            xsummary[ipar, 0] = xmean[0]
            xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0)

        if xdatacls.itestmode == 3:
            filename = "predicted_missing.csv"
            ## ctables.export_prediction(filename,xdatacls,cPredict.Zrow)

            ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

        print("Average best parameters")
        xlabels = ("c", "d", "par1", "par2")
        for i in range(nparam):
            print(xlabels[i], ": ", np.mean(xbest_param[:, :, i]), "(", np.std(xbest_param[:, :, i]), ")")

    if xdatacls.knowntest == 1:
        print("$$$$$$$$$ Summary results:")
        (m, n) = xsummary.shape
        for i in range(m):
            for j in range(n):
                print("%10.4f" % xsummary[i, j], end="")
            print()

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print("Bye")

    return
예제 #16
0
def test_mvm_main(workmode):

  params=mmr_setparams.cls_params()

  xdatacls=mvm_mvm_cls.cls_mvm()
  nfold=xdatacls.nfold
  if xdatacls.itestmode==0:
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation

  nparacc=2   ## rmse, time
  npar=1
  xsummary=np.zeros((npar,nparacc))
  
  ## ['full','full_20','full_40','full_60', \
  ##  'known','known_20','known_40','known_60']
  ifile1=0   ## file index in list known
  ifile2=0   ## file index in list full
  iknown1=1  ## known 
  iknown2=0  ## full
  iloadall=1  ## =0 one file for crossvalidation =1 two files: training + test

  print('iknown1:',iknown1,'iknown2:',iknown2)
  print('ifile1:',ifile1,'ifile2:',ifile2)
  
  for ipar in range(npar):

    ## possible values
    Y0=np.array([0,1])
    ctables=kingsc_load_data.cls_label_files()  ## data loading object
    print(ctables.listknown[ifile1])
    print(ctables.listfull[ifile2])
    if iloadall==0:   ## only one file is loaded for cross validation
      (xdata,nrow2,ncol2)=ctables.load_onefile(iknown1,ifile1) 
      xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),Y0)
    else: ## the first file gives trining the second serves as test 
      (xdata,nrow2,ncol2,ifixtrain,ifixtest)=ctables.load_twofiles( \
                          iknown1,iknown2,ifile1,ifile2)
      xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),Y0)
      xdatacls.ifixtrain=ifixtrain
      xdatacls.ifixtest=ifixtest

    scombine=''
    if xdatacls.itestmode==0:
      if xdatacls.ibootstrap==0:
        fname='xresultte_rand'+scombine+'.csv'
      elif xdatacls.ibootstrap==1:  
        fname='xresultte_active'+scombine+'.csv'
      elif xdatacls.ibootstrap==2:  
        fname='xresultte_greedy'+scombine+'.csv'
      elif xdatacls.ibootstrap==3:  
        fname='xresultte_act_rand'+scombine+'.csv'
    else:
      fname='xresultte_ncross'+scombine+'.csv'

    xdatacls.YKernel.ymax=1
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin=0
    xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                            /xdatacls.YKernel.yrange
    ##  set_printoptions(precision=4)
    nparam=4    # C,D,par1,par2
    nreport=4   ## accuracy, precision, recall, f1

    xdatacls.prepare_repetition_folding(init_train_size=100)
    nrepeat0=xdatacls.nrepeat0
    nfold0=xdatacls.nfold0

    creport=mmr_report_cls.cls_mmr_report()
    creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold,nreport=nreport)
    xbest_param=np.zeros((nrepeat0,nfold0,nparam))

    # ############################################################

    nval=max(xdatacls.YKernel.valrange)+1
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    xsolvertime=0.0
    ireport=0
    for irepeat in range(nrepeat0):

      xdatacls.nfold0=xdatacls.nfold
      xdatacls.prepare_repetition_training()
      ## nfold0=1

      for ifold in range(nfold0):

        xdatacls.prepare_fold_training(ifold)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        cvalidation=mvm_validation_cls.cls_mvm_validation()
        cvalidation.validation_rkernel=xdatacls.XKernel[0].title
        best_param=cvalidation.mvm_validation(xdatacls)

        print('Parameters:',best_param.c,best_param.d, \
              best_param.par1,best_param.par2)

        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train()
        xsolvertime+=xdatacls.solvertime
        print('Training time:',time.time()-time0)
        sys.stdout.flush()

    # check the train accuracy
        print('test on training')

    # check the test accuracy
        print('test on test')
        time0=time.time()

#         xdatacls.xdata_tes=ctables.full_test()
#         xdatacls.xranges_rel_test=mvm_prepare.mvm_ranges(xdatacls.xdata_tes, \
#                                                xdatacls.nrow)
        
        cPredict=xdatacls.mvm_test()
        print('Test time:',time.time()-time0)
        sys.stdout.flush()

        ## ctables.export_prediction(cPredict.Zrow)

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        if xdatacls.knowntest==1:
          (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                            xdatacls.nrow, \
                                            xdatacls,cPredict.Zrow)
          print('Evaluation time:',time.time()-time0)
          ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

          if xdatacls.ieval_type in (0,11):
            creport.set_xaprf(irepeat,ifold,cEval)
          elif xdatacls.ieval_type==10:
            creport.set_xaprf(irepeat,ifold,cEval)
            xconfusion3[irepeat,ifold]=cEval.xconfusion3
          else:
            creport.set_xaprf(irepeat,ifold,cEval)

          ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
          ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
          ireport+=1

          ## print(cEval.xconfusion)
          if xdatacls.ieval_type in (0,11):
            for xconfrow in cEval.xconfusion:
              for ditem in xconfrow:
                print('%7.0f'%ditem,end='')
              print()
            print()
          elif xdatacls.ieval_type==10:
            for xtable in cEval.xconfusion3:
              xsum=np.sum(xtable)
              if xsum==0:
                xsum=1
              xtable=100*xtable/xsum
              for xconfrow in xtable:
                for ditem in xconfrow:
                  print('%9.4f'%ditem,end='')
                print()
              print()
            print()

      # ####################################    
          print('*** ipar, repeatation, fold ***') 
          print(ipar,irepeat,ifold)
        
          if xdatacls.itestmode==1: ## n-fold crossvalidation

            creport.report_prf(xmask=[irepeat,ifold], \
                             stitle='Result in one fold and one repetation', \
                             ssubtitle='Accuracy on test')

      if xdatacls.knowntest==1:
        creport.report_prf(xmask=[irepeat,None], \
                         stitle='Result in one repetation', \
                         ssubtitle='Mean and std of the accuracy on test')

      sys.stdout.flush()


    if xdatacls.knowntest==1:
      (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                     stitle='***** Overall result ****', \
                     ssubtitle='Mean and std of the accuracy on test + error')

      xsummary[ipar,0]=xmean[0]
      xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0)                          

    if iloadall==1:
      filename='predicted_missing'
      if iknown1==1:
        filename+='_'+ctables.listknown[ifile1]
      else:
        filename+='_'+ctables.listfull[ifile1]
      if iknown2==1:
        filename+='_'+ctables.listknown[ifile2]
      else:
        filename+='_'+ctables.listfull[ifile2]
      filename+='.csv'
      ctables.export_test_prediction(filename,xdatacls,cPredict.Zrow)

      ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

    print('Average best parameters')
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')

  if xdatacls.knowntest==1:
    print('$$$$$$$$$ Summary results:')
    (m,n)=xsummary.shape
    for i in range(m):
      for j in range(n):
        print('%10.4f'%xsummary[i,j],end='')
      print()

  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return