Exemple #1
0
def test_mvm_main(workmode):

  params=mmr_setparams.cls_params()

  xdatacls=mvm_mvm_cls.cls_mvm()
  nfold=xdatacls.nfold
  if xdatacls.itestmode==0:
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation

  nparacc=2   ## rmse, time
  npar=1
  xsummary=np.zeros((npar,nparacc))
  
  ifile=0
  pselect=0.05
  itrates=1
  print('ifile:',ifile)
  print('itrates:',itrates)
  print('pselect:',pselect)
  lfiles=[]
  
  for ipar in range(npar):

    rmatrix=mvm_random_matrix.cls_label_files()
    (xdata,nrow2,ncol2)=rmatrix.load(ifile,pselect,itrain=itrates)
    xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),None)
    scombine=''
    if xdatacls.itestmode==0:
      if xdatacls.ibootstrap==0:
        fname='xresultte_rand'+scombine+'.csv'
      elif xdatacls.ibootstrap==1:  
        fname='xresultte_active'+scombine+'.csv'
      elif xdatacls.ibootstrap==2:  
        fname='xresultte_greedy'+scombine+'.csv'
      elif xdatacls.ibootstrap==3:  
        fname='xresultte_act_rand'+scombine+'.csv'
    else:
      fname='xresultte_ncross'+scombine+'.csv'

    xdatacls.YKernel.ymax=1
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin=-1
    xdatacls.YKernel.yrange=200 # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                            /xdatacls.YKernel.yrange
    ##  set_printoptions(precision=4)
    nparam=4    # C,D,par1,par2
    nreport=4   ## accuracy, precision, recall, f1

    xdatacls.prepare_repetition_folding(init_train_size=100)
    nrepeat0=xdatacls.nrepeat0
    nfold0=xdatacls.nfold0

    creport=mmr_report_cls.cls_mmr_report()
    creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold0,nreport=nreport)
    xbest_param=np.zeros((nrepeat0,nfold0,nparam))

    # ############################################################

    nval=max(xdatacls.YKernel.valrange)+1
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    xsolvertime=0.0
    ireport=0
    for irepeat in range(nrepeat0):

      xdatacls.prepare_repetition_training()

      for ifold in range(nfold0):

        xdatacls.prepare_fold_training(ifold)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        cvalidation=mvm_validation_cls.cls_mvm_validation()
        cvalidation.validation_rkernel=xdatacls.XKernel[0].title
        best_param=cvalidation.mvm_validation(xdatacls)

        print('Parameters:',best_param.c,best_param.d, \
              best_param.par1,best_param.par2)

        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train()
        xsolvertime+=xdatacls.solvertime
        print('Training time:',time.time()-time0)
        sys.stdout.flush()

    # check the train accuracy
        print('test on training')

    # check the test accuracy
        print('test on test')
        time0=time.time()
        cPredict=xdatacls.mvm_test()
        print('Test time:',time.time()-time0)
        sys.stdout.flush()

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                          xdatacls.nrow,xdatacls,cPredict.Zrow)
        print('Evaluation time:',time.time()-time0)
        (qtest,qpred)=makearray(xdatacls,cPredict.Zrow)

        if xdatacls.ieval_type==0:
          creport.set_xaprf(irepeat,ifold,cEval)
        elif xdatacls.ieval_type==10:
          creport.set_xaprf(irepeat,ifold,cEval)
          xconfusion3[irepeat,ifold]=cEval.xconfusion3
        else:
          creport.set_xaprf(irepeat,ifold,cEval)

        xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
        xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
        ireport+=1

        ## print(cEval.xconfusion)
        if xdatacls.ieval_type==0:
          for xconfrow in cEval.xconfusion:
            for ditem in xconfrow:
              print('%7.0f'%ditem,end='')
            print()
          print()
        elif xdatacls.ieval_type==10:
          for xtable in cEval.xconfusion3:
            xsum=np.sum(xtable)
            if xsum==0:
              xsum=1
            xtable=100*xtable/xsum
            for xconfrow in xtable:
              for ditem in xconfrow:
                print('%9.4f'%ditem,end='')
              print()
            print()
          print()
        
    # ####################################    
        print('*** ipar, repeatation, fold ***') 
        print(ipar,irepeat,ifold)
        
        if xdatacls.itestmode==1: ## n-fold crossvalidation

          creport.report_prf(xmask=[irepeat,ifold], \
                             stitle='Result in one fold and one repetation', \
                             ssubtitle='Accuracy on test')

      creport.report_prf(xmask=[irepeat,None], \
                         stitle='Result in one repetation', \
                         ssubtitle='Mean and std of the accuracy on test')

      sys.stdout.flush()

      if xdatacls.itestmode==0: ## n-fold crossvalidation
        np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \
                   fmt='%6.4f')
      else:
        if xdatacls.ieval_type==0:
          np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \
                     fmt='%6.4f')
        else:
          np.savetxt(fname,creport.xaprf[:,:,0],delimiter=',',fmt='%6.4f')

    (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                     stitle='***** Overall result ****', \
                     ssubtitle='Mean and std of the accuracy on test + error')

    xsummary[ipar,0]=xmean[0]
    xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0)                          

    if xdatacls.ieval_type==10:
      confusion_latex(xconfusion3,lfiles)      
      
    print('Average best parameters')
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')

  print('$$$$$$$$$ Summary results:')
  (m,n)=xsummary.shape
  for i in range(m):
    for j in range(n):
      print('%10.4f'%xsummary[i,j],end='')
    print()

  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return
Exemple #2
0
def test_mvm_main(workmode):

    params = mmr_setparams.cls_params()

    xdatacls = mvm_mvm_cls.cls_mvm()
    nfold = xdatacls.nfold
    if xdatacls.itestmode == 0:
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation

    nparacc = 2  ## rmse, time
    npar = 1
    xsummary = np.zeros((npar, nparacc))

    ifile = 0
    pselect = 0.05
    itrates = 1
    print('ifile:', ifile)
    print('itrates:', itrates)
    print('pselect:', pselect)
    lfiles = []

    for ipar in range(npar):

        rmatrix = mvm_random_matrix.cls_label_files()
        (xdata, nrow2, ncol2) = rmatrix.load(ifile, pselect, itrain=itrates)
        xdatacls.load_data(xdata,xdatacls.categorymax, \
                           int(nrow2),int(ncol2),None)
        scombine = ''
        if xdatacls.itestmode == 0:
            if xdatacls.ibootstrap == 0:
                fname = 'xresultte_rand' + scombine + '.csv'
            elif xdatacls.ibootstrap == 1:
                fname = 'xresultte_active' + scombine + '.csv'
            elif xdatacls.ibootstrap == 2:
                fname = 'xresultte_greedy' + scombine + '.csv'
            elif xdatacls.ibootstrap == 3:
                fname = 'xresultte_act_rand' + scombine + '.csv'
        else:
            fname = 'xresultte_ncross' + scombine + '.csv'

        xdatacls.YKernel.ymax = 1
        # it will be recomputed in mvm_ranges
        xdatacls.YKernel.ymin = -1
        xdatacls.YKernel.yrange = 200  # it will be recomputed in classcol_ranges
        xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                                /xdatacls.YKernel.yrange
        ##  set_printoptions(precision=4)
        nparam = 4  # C,D,par1,par2
        nreport = 4  ## accuracy, precision, recall, f1

        xdatacls.prepare_repetition_folding(init_train_size=100)
        nrepeat0 = xdatacls.nrepeat0
        nfold0 = xdatacls.nfold0

        creport = mmr_report_cls.cls_mmr_report()
        creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport)
        xbest_param = np.zeros((nrepeat0, nfold0, nparam))

        # ############################################################

        nval = max(xdatacls.YKernel.valrange) + 1
        xconfusion3 = np.zeros(
            (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        xsolvertime = 0.0
        ireport = 0
        for irepeat in range(nrepeat0):

            xdatacls.prepare_repetition_training()

            for ifold in range(nfold0):

                xdatacls.prepare_fold_training(ifold)

                # validation to choose the best parameters
                print('Validation')
                xdatacls.set_validation()
                cvalidation = mvm_validation_cls.cls_mvm_validation()
                cvalidation.validation_rkernel = xdatacls.XKernel[0].title
                best_param = cvalidation.mvm_validation(xdatacls)

                print('Parameters:',best_param.c,best_param.d, \
                      best_param.par1,best_param.par2)

                print('Best parameters found by validation')
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print('training')

                time0 = time.time()
                cOptDual = xdatacls.mvm_train()
                xsolvertime += xdatacls.solvertime
                print('Training time:', time.time() - time0)
                sys.stdout.flush()

                # check the train accuracy
                print('test on training')

                # check the test accuracy
                print('test on test')
                time0 = time.time()
                cPredict = xdatacls.mvm_test()
                print('Test time:', time.time() - time0)
                sys.stdout.flush()

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                                  xdatacls.nrow,xdatacls,cPredict.Zrow)
                print('Evaluation time:', time.time() - time0)
                (qtest, qpred) = makearray(xdatacls, cPredict.Zrow)

                if xdatacls.ieval_type == 0:
                    creport.set_xaprf(irepeat, ifold, cEval)
                elif xdatacls.ieval_type == 10:
                    creport.set_xaprf(irepeat, ifold, cEval)
                    xconfusion3[irepeat, ifold] = cEval.xconfusion3
                else:
                    creport.set_xaprf(irepeat, ifold, cEval)

                xdatacls.icandidate_w = xdatacls.itest[icandidate_w]
                xdatacls.icandidate_b = xdatacls.itest[icandidate_b]
                ireport += 1

                ## print(cEval.xconfusion)
                if xdatacls.ieval_type == 0:
                    for xconfrow in cEval.xconfusion:
                        for ditem in xconfrow:
                            print('%7.0f' % ditem, end='')
                        print()
                    print()
                elif xdatacls.ieval_type == 10:
                    for xtable in cEval.xconfusion3:
                        xsum = np.sum(xtable)
                        if xsum == 0:
                            xsum = 1
                        xtable = 100 * xtable / xsum
                        for xconfrow in xtable:
                            for ditem in xconfrow:
                                print('%9.4f' % ditem, end='')
                            print()
                        print()
                    print()

        # ####################################
                print('*** ipar, repeatation, fold ***')
                print(ipar, irepeat, ifold)

                if xdatacls.itestmode == 1:  ## n-fold crossvalidation

                    creport.report_prf(xmask=[irepeat,ifold], \
                                       stitle='Result in one fold and one repetation', \
                                       ssubtitle='Accuracy on test')

            creport.report_prf(xmask=[irepeat,None], \
                               stitle='Result in one repetation', \
                               ssubtitle='Mean and std of the accuracy on test')

            sys.stdout.flush()

            if xdatacls.itestmode == 0:  ## n-fold crossvalidation
                np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \
                           fmt='%6.4f')
            else:
                if xdatacls.ieval_type == 0:
                    np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \
                               fmt='%6.4f')
                else:
                    np.savetxt(fname,
                               creport.xaprf[:, :, 0],
                               delimiter=',',
                               fmt='%6.4f')

        (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                         stitle='***** Overall result ****', \
                         ssubtitle='Mean and std of the accuracy on test + error')

        xsummary[ipar, 0] = xmean[0]
        xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0)

        if xdatacls.ieval_type == 10:
            confusion_latex(xconfusion3, lfiles)

        print('Average best parameters')
        xlabels = ('c', 'd', 'par1', 'par2')
        for i in range(nparam):
            print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                    '(',np.std(xbest_param[:,:,i]),')')

    print('$$$$$$$$$ Summary results:')
    (m, n) = xsummary.shape
    for i in range(m):
        for j in range(n):
            print('%10.4f' % xsummary[i, j], end='')
        print()

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print('Bye')

    return
Exemple #3
0
def roar_main(workmode):

  params=mmr_setparams.cls_params()
  params.setvalidation()
  params.setsolver()
  params.setgeneral()
  params.setoutput()
  params.setinput()

## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  xdatacls=mvm_mvm_cls.cls_mvm()

  roar_prepare.roar_prepare(xdatacls)

  nfold=xdatacls.nfold
  if xdatacls.itestmode in (0,3):
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation
  nrepeat=xdatacls.nrepeat

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  scombine=''

  if xdatacls.itestmode==0:
    if xdatacls.ibootstrap==0:
      fname='xresultte_rand'+scombine+'.csv'
    elif xdatacls.ibootstrap==1:  
      fname='xresultte_active'+scombine+'.csv'
    elif xdatacls.ibootstrap==2:  
      fname='xresultte_greedy'+scombine+'.csv'
    elif xdatacls.ibootstrap==3:  
      fname='xresultte_act_rand'+scombine+'.csv'
  else:
    fname='xresultte_ncross'+scombine+'.csv'

  ## xdatacls.YKernel.ymax=ctables.ncategory
  # it will be recomputed in mvm_ranges
  xdatacls.YKernel.ymin=0
  xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges
  xdatacls.YKernel.ystep=1  

  # load the databases
  # data file
  ndata=xdatacls.ndata
  
##  set_printoptions(precision=4)
  npar=1   ## number of parameter selected for random subsample
  
  nparam=4    # C,D,par1,par2
  nreport=4   ## accuracy, precision, recall, f1

  if xdatacls.itestmode==0:
    nrepeat0=ndata-1   ## active learning
  else:
    nrepeat0=nrepeat

  if xdatacls.itestmode==0:
    ## initialize the active learning seeds
    ## pzero=0.001
    ## xselector=1*(np.random.rand(ndata)<pzero)

    nzero=100  ## !!!!!!!! initial training size
    xselector=np.zeros(ndata)
    nprime=4999
    ip=0
    for i in range(nzero):
      ip+=nprime
      if ip>ndata:
        ip=ip%ndata
      xselector[ip]=1  

    ndatainit=int(np.sum(xselector))
    mtest=ndata-ndatainit
    xdatacls.itest=np.where(xselector==0)[0]
    icandidate_w=-1
    icandidate_b=-1
    ## nrepeat0=ndata-ndatainit-10
    nrepeat0=min(100000,ndata-ndatainit-1000)  ## !!!!!! test size
    ## nrepeat0=1
  else:   ## n-fold cross validation
    nrepeat0=nrepeat
    
  xresulttr=np.zeros((nrepeat0,nfold0))
  xresultte=np.zeros((nrepeat0,nfold0,nreport))
  xbest_param=np.zeros((nrepeat0,nfold0,nparam))

  # ############################################################

  # number iterations in the optimization
  params.solver.niter=100
  print('niter:',params.solver.niter)

  for ipar in range(npar):

    nval=len(xdatacls.YKernel.valrange)
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    ireport=0
    ## for irepeat in range(int(float(ndata)/3)):
    for irepeat in range(nrepeat0):

      if xdatacls.itestmode==0:
        if xdatacls.ibootstrap==0:
          if icandidate_w>=0:
            icandidate_w=np.random.randint(mtest,size=1)
            icandidate_w=xdatacls.itest[icandidate_w]
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best 
        elif xdatacls.ibootstrap==1:  ## worst confidence
          if icandidate_w>=0:
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best 
        elif xdatacls.ibootstrap==2:  ## best confidence
          if icandidate_b>=0:
            xselector[icandidate_b]=1
        elif xdatacls.ibootstrap==3:  ## worst+random
          if icandidate_w>=0:
            pselect=np.random.rand()
            if pselect<0.5:
              icandidate_w=np.random.randint(mtest)
              icandidate_w=xdatacls.itest[icandidate_w]
            xselector[icandidate_w]=1
            ## xselector[icandidate_b]=0     ## delete the best
      elif xdatacls.itestmode==1:   ## n-fold cross-validation
        ## !!! Emre !!!
        xselector=np.floor(np.random.random(ndata)*nfold0)
        xselector=xselector-(xselector==nfold0)

      ## if xdatacls.itestmode==1:  ## n-fold crossvalidation
      ##   xselector=np.random.randint(nfold0, size=ndata)
      ## elif xdatacls.itestmode==2:  ## random subset
      ##   xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## for test only
      elif xdatacls.itestmode==-1:
        for i in range(ndata):
          xselector[i]=i%nfold0
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!          
##        xselector_row=np.floor(nfold0*np.random.rand(nrow))

      for ifold in range(nfold0):

        xdatacls.split_train_test(xselector,ifold)
        mtest=len(xdatacls.itest)
        if mtest<=0:
          print('!!!!!!!')
          break

        print('mtest:',mtest,'mtrain:',len(xdatacls.itrain))

        xdatacls.mvm_datasplit()        

    # sparse matrices of ranks-row_avarage-col_average+total_avarege  
        xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \
                                     params)
        xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \
                                          xdatacls.nrow,params)
        ## mvm_loadmatrix(xdatacls,isubset_tra,params)
        if xdatacls.category==0:
          mvm_glm(xdatacls,params)
          mvm_ygrid(xdatacls,params)
        elif xdatacls.category==1:
          mvm_largest_category(xdatacls)
        elif xdatacls.category==2:
          mvm_largest_category(xdatacls)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        params.validation.rkernel=xdatacls.XKernel[0].title
        if params.validation.rkernel in xdatacls.dkernels:
          kernbest=xdatacls.dkernels[params.validation.rkernel].kernel_params
        else:
          kernbest=xdatacls.XKernel[0].kernel_params
        
        if params.validation.ivalid==1:
          best_param=mvm_validation(xdatacls,params)
        else:
          best_param=cls_empty_class()
          best_param.c=xdatacls.penalty.c
          best_param.d=xdatacls.penalty.d
          best_param.par1=kernbest.ipar1
          best_param.par2=kernbest.ipar2

        xdatacls.penalty.c=best_param.c
        xdatacls.penalty.d=best_param.d
        kernbest.ipar1=best_param.par1
        kernbest.ipar2=best_param.par2

        print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \
              kernbest.ipar1,kernbest.ipar2)
        
        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train(params)
        print('Training time:',time.time()-time0)
        
    # cls transfers the dual variables to the test procedure
    # compute test 

    # check the train accuracy
        print('test on training')

    # $$$ # counts the proportion the ones predicted correctly    
    # $$$ # ######################################
    # $$$     deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \
    # $$$                      xranges_tra,Zrow)
    # $$$     xresulttr(irepeat,ifold)=deval
    # ######################################     
    # check the test accuracy
        print('test on test')
        time0=time.time()
        cPredict=xdatacls.mvm_test(cOptDual.alpha,params)
        print('Test time:',time.time()-time0)

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                          xdatacls.nrow,xdatacls,cPredict.Zrow)
        print('Evaluation time:',time.time()-time0)

        if xdatacls.ieval_type==0:
          xresultte[irepeat,ifold,0]=cEval.accuracy
          ## prediction of effective categories
          ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \
          ##           np.sum(cEval.xconfusion[1:,1:])
          ## xresultte[irepeat,ifold,1]=part_accuracy
          xresultte[irepeat,ifold,1]=cEval.precision
          xresultte[irepeat,ifold,2]=cEval.recall
          xresultte[irepeat,ifold,3]=cEval.f1
        elif xdatacls.ieval_type==10:
          xresultte[irepeat,ifold,0]=cEval.accuracy
          xconfusion3[irepeat,ifold]=cEval.xconfusion3
        else:
          xresultte[irepeat,ifold,0]=cEval.deval
        icandidate_w=xdatacls.itest[icandidate_w]
        icandidate_b=xdatacls.itest[icandidate_b]
        ireport+=1

        ## print(cEval.xconfusion)
        if xdatacls.ieval_type!=10:
          for xconfrow in cEval.xconfusion:
            for ditem in xconfrow:
              print('%7.0f'%ditem,end='')
            print()
          print()
        else:
          for xtable in cEval.xconfusion3:
            xsum=np.sum(xtable)
            if xsum==0:
              xsum=1
            xtable=100*xtable/xsum
            for xconfrow in xtable:
              for ditem in xconfrow:
                print('%8.4f'%ditem,end='')
              print()
            print()
          print()
        
    # ####################################    
        print('*** ipar, repeatation, fold ***') 
        print(ipar,irepeat,ifold)

        if xdatacls.itestmode==1: ## n-fold crossvalidation
          print('Result in one fold and one repeatation')
          ## print('Accuracy on train')
          ## print(xresulttr[irepeat,ifold])
          print('Accuracy on test')
          if xdatacls.ieval_type==0:
            print(xresultte[irepeat,ifold])
          else:
            print(xresultte[irepeat,ifold,0])

      print('Result in one repetation')
      print('Mean and std of the accuracy on test')
      if xdatacls.ieval_type==0:
        print(np.mean(xresultte[irepeat,:,0]),
            np.std(xresultte[irepeat,:,0]))
      else:
        print(np.mean(xresultte[irepeat,:,0]),
            np.std(xresultte[irepeat,:,0]))
        
      sys.stdout.flush()
        
      if xdatacls.itestmode==0: ## n-fold crossvalidation
        np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
      else:
        if xdatacls.ieval_type==0:
          np.savetxt(fname,xresultte[:ireport,:,:],delimiter=',',fmt='%6.4f')
        else:
          np.savetxt(fname,xresultte[:ireport,:,0],delimiter=',',fmt='%6.4f')

    print('***** Overall result ****')
    print('Mean and std of the accuracy on test + error')
    if xdatacls.ieval_type==0:
      print(np.mean(xresultte[:,:,0]),
            np.std(xresultte[:,:,0]))
    else:
      print(np.mean(xresultte[:,:,0]),
            np.std(xresultte[:,:,0]))

#     if xdatacls.ieval_type==10:
#       confusion_latex(xconfusion3,lfiles)      
      
    print('Average best parameters')
    ##  sfield=dir(best_param)
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
    ##    print(sfield[i])
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')
  
  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return
Exemple #4
0
def roar_main(workmode):

    params = mmr_setparams.cls_params()
    params.setvalidation()
    params.setsolver()
    params.setgeneral()
    params.setoutput()
    params.setinput()

    ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    xdatacls = mvm_mvm_cls.cls_mvm()

    roar_prepare.roar_prepare(xdatacls)

    nfold = xdatacls.nfold
    if xdatacls.itestmode in (0, 3):
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation
    nrepeat = xdatacls.nrepeat

    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    scombine = ''

    if xdatacls.itestmode == 0:
        if xdatacls.ibootstrap == 0:
            fname = 'xresultte_rand' + scombine + '.csv'
        elif xdatacls.ibootstrap == 1:
            fname = 'xresultte_active' + scombine + '.csv'
        elif xdatacls.ibootstrap == 2:
            fname = 'xresultte_greedy' + scombine + '.csv'
        elif xdatacls.ibootstrap == 3:
            fname = 'xresultte_act_rand' + scombine + '.csv'
    else:
        fname = 'xresultte_ncross' + scombine + '.csv'

    ## xdatacls.YKernel.ymax=ctables.ncategory
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin = 0
    xdatacls.YKernel.yrange = 100  # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep = 1

    # load the databases
    # data file
    ndata = xdatacls.ndata

    ##  set_printoptions(precision=4)
    npar = 1  ## number of parameter selected for random subsample

    nparam = 4  # C,D,par1,par2
    nreport = 4  ## accuracy, precision, recall, f1

    if xdatacls.itestmode == 0:
        nrepeat0 = ndata - 1  ## active learning
    else:
        nrepeat0 = nrepeat

    if xdatacls.itestmode == 0:
        ## initialize the active learning seeds
        ## pzero=0.001
        ## xselector=1*(np.random.rand(ndata)<pzero)

        nzero = 100  ## !!!!!!!! initial training size
        xselector = np.zeros(ndata)
        nprime = 4999
        ip = 0
        for i in range(nzero):
            ip += nprime
            if ip > ndata:
                ip = ip % ndata
            xselector[ip] = 1

        ndatainit = int(np.sum(xselector))
        mtest = ndata - ndatainit
        xdatacls.itest = np.where(xselector == 0)[0]
        icandidate_w = -1
        icandidate_b = -1
        ## nrepeat0=ndata-ndatainit-10
        nrepeat0 = min(100000, ndata - ndatainit - 1000)  ## !!!!!! test size
        ## nrepeat0=1
    else:  ## n-fold cross validation
        nrepeat0 = nrepeat

    xresulttr = np.zeros((nrepeat0, nfold0))
    xresultte = np.zeros((nrepeat0, nfold0, nreport))
    xbest_param = np.zeros((nrepeat0, nfold0, nparam))

    # ############################################################

    # number iterations in the optimization
    params.solver.niter = 100
    print('niter:', params.solver.niter)

    for ipar in range(npar):

        nval = len(xdatacls.YKernel.valrange)
        xconfusion3 = np.zeros(
            (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        ireport = 0
        ## for irepeat in range(int(float(ndata)/3)):
        for irepeat in range(nrepeat0):

            if xdatacls.itestmode == 0:
                if xdatacls.ibootstrap == 0:
                    if icandidate_w >= 0:
                        icandidate_w = np.random.randint(mtest, size=1)
                        icandidate_w = xdatacls.itest[icandidate_w]
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
                elif xdatacls.ibootstrap == 1:  ## worst confidence
                    if icandidate_w >= 0:
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
                elif xdatacls.ibootstrap == 2:  ## best confidence
                    if icandidate_b >= 0:
                        xselector[icandidate_b] = 1
                elif xdatacls.ibootstrap == 3:  ## worst+random
                    if icandidate_w >= 0:
                        pselect = np.random.rand()
                        if pselect < 0.5:
                            icandidate_w = np.random.randint(mtest)
                            icandidate_w = xdatacls.itest[icandidate_w]
                        xselector[icandidate_w] = 1
                        ## xselector[icandidate_b]=0     ## delete the best
            elif xdatacls.itestmode == 1:  ## n-fold cross-validation
                ## !!! Emre !!!
                xselector = np.floor(np.random.random(ndata) * nfold0)
                xselector = xselector - (xselector == nfold0)

            ## if xdatacls.itestmode==1:  ## n-fold crossvalidation
            ##   xselector=np.random.randint(nfold0, size=ndata)
            ## elif xdatacls.itestmode==2:  ## random subset
            ##   xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## for test only
            elif xdatacls.itestmode == -1:
                for i in range(ndata):
                    xselector[i] = i % nfold0
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
##        xselector_row=np.floor(nfold0*np.random.rand(nrow))

            for ifold in range(nfold0):

                xdatacls.split_train_test(xselector, ifold)
                mtest = len(xdatacls.itest)
                if mtest <= 0:
                    print('!!!!!!!')
                    break

                print('mtest:', mtest, 'mtrain:', len(xdatacls.itrain))

                xdatacls.mvm_datasplit()

                # sparse matrices of ranks-row_avarage-col_average+total_avarege
                xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \
                                             params)
                xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \
                                                  xdatacls.nrow,params)
                ## mvm_loadmatrix(xdatacls,isubset_tra,params)
                if xdatacls.category == 0:
                    mvm_glm(xdatacls, params)
                    mvm_ygrid(xdatacls, params)
                elif xdatacls.category == 1:
                    mvm_largest_category(xdatacls)
                elif xdatacls.category == 2:
                    mvm_largest_category(xdatacls)

        # validation to choose the best parameters
                print('Validation')
                xdatacls.set_validation()
                params.validation.rkernel = xdatacls.XKernel[0].title
                if params.validation.rkernel in xdatacls.dkernels:
                    kernbest = xdatacls.dkernels[
                        params.validation.rkernel].kernel_params
                else:
                    kernbest = xdatacls.XKernel[0].kernel_params

                if params.validation.ivalid == 1:
                    best_param = mvm_validation(xdatacls, params)
                else:
                    best_param = cls_empty_class()
                    best_param.c = xdatacls.penalty.c
                    best_param.d = xdatacls.penalty.d
                    best_param.par1 = kernbest.ipar1
                    best_param.par2 = kernbest.ipar2

                xdatacls.penalty.c = best_param.c
                xdatacls.penalty.d = best_param.d
                kernbest.ipar1 = best_param.par1
                kernbest.ipar2 = best_param.par2

                print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \
                      kernbest.ipar1,kernbest.ipar2)

                print('Best parameters found by validation')
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print('training')

                time0 = time.time()
                cOptDual = xdatacls.mvm_train(params)
                print('Training time:', time.time() - time0)

                # cls transfers the dual variables to the test procedure
                # compute test

                # check the train accuracy
                print('test on training')

                # $$$ # counts the proportion the ones predicted correctly
                # $$$ # ######################################
                # $$$     deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \
                # $$$                      xranges_tra,Zrow)
                # $$$     xresulttr(irepeat,ifold)=deval
                # ######################################
                # check the test accuracy
                print('test on test')
                time0 = time.time()
                cPredict = xdatacls.mvm_test(cOptDual.alpha, params)
                print('Test time:', time.time() - time0)

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                                  xdatacls.nrow,xdatacls,cPredict.Zrow)
                print('Evaluation time:', time.time() - time0)

                if xdatacls.ieval_type == 0:
                    xresultte[irepeat, ifold, 0] = cEval.accuracy
                    ## prediction of effective categories
                    ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \
                    ##           np.sum(cEval.xconfusion[1:,1:])
                    ## xresultte[irepeat,ifold,1]=part_accuracy
                    xresultte[irepeat, ifold, 1] = cEval.precision
                    xresultte[irepeat, ifold, 2] = cEval.recall
                    xresultte[irepeat, ifold, 3] = cEval.f1
                elif xdatacls.ieval_type == 10:
                    xresultte[irepeat, ifold, 0] = cEval.accuracy
                    xconfusion3[irepeat, ifold] = cEval.xconfusion3
                else:
                    xresultte[irepeat, ifold, 0] = cEval.deval
                icandidate_w = xdatacls.itest[icandidate_w]
                icandidate_b = xdatacls.itest[icandidate_b]
                ireport += 1

                ## print(cEval.xconfusion)
                if xdatacls.ieval_type != 10:
                    for xconfrow in cEval.xconfusion:
                        for ditem in xconfrow:
                            print('%7.0f' % ditem, end='')
                        print()
                    print()
                else:
                    for xtable in cEval.xconfusion3:
                        xsum = np.sum(xtable)
                        if xsum == 0:
                            xsum = 1
                        xtable = 100 * xtable / xsum
                        for xconfrow in xtable:
                            for ditem in xconfrow:
                                print('%8.4f' % ditem, end='')
                            print()
                        print()
                    print()

        # ####################################
                print('*** ipar, repeatation, fold ***')
                print(ipar, irepeat, ifold)

                if xdatacls.itestmode == 1:  ## n-fold crossvalidation
                    print('Result in one fold and one repeatation')
                    ## print('Accuracy on train')
                    ## print(xresulttr[irepeat,ifold])
                    print('Accuracy on test')
                    if xdatacls.ieval_type == 0:
                        print(xresultte[irepeat, ifold])
                    else:
                        print(xresultte[irepeat, ifold, 0])

            print('Result in one repetation')
            print('Mean and std of the accuracy on test')
            if xdatacls.ieval_type == 0:
                print(np.mean(xresultte[irepeat, :, 0]),
                      np.std(xresultte[irepeat, :, 0]))
            else:
                print(np.mean(xresultte[irepeat, :, 0]),
                      np.std(xresultte[irepeat, :, 0]))

            sys.stdout.flush()

            if xdatacls.itestmode == 0:  ## n-fold crossvalidation
                np.savetxt(fname,
                           xresultte[:ireport, 0, :],
                           delimiter=',',
                           fmt='%6.4f')
            else:
                if xdatacls.ieval_type == 0:
                    np.savetxt(fname,
                               xresultte[:ireport, :, :],
                               delimiter=',',
                               fmt='%6.4f')
                else:
                    np.savetxt(fname,
                               xresultte[:ireport, :, 0],
                               delimiter=',',
                               fmt='%6.4f')

        print('***** Overall result ****')
        print('Mean and std of the accuracy on test + error')
        if xdatacls.ieval_type == 0:
            print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0]))
        else:
            print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0]))

#     if xdatacls.ieval_type==10:
#       confusion_latex(xconfusion3,lfiles)

        print('Average best parameters')
        ##  sfield=dir(best_param)
        xlabels = ('c', 'd', 'par1', 'par2')
        for i in range(nparam):
            ##    print(sfield[i])
            print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
                    '(',np.std(xbest_param[:,:,i]),')')

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print('Bye')

    return
Exemple #5
0
def test_mvm_main(workmode):

    params = mmr_setparams.cls_params()

    xdatacls = mvm_mvm_cls.cls_mvm()
    nfold = xdatacls.nfold
    if xdatacls.itestmode == 0:
        nfold0 = 1  ## active learning
    else:
        nfold0 = nfold  ## n-fold cross validation

    nparacc = 2  ## rmse, time
    npar = 1
    xsummary = np.zeros((npar, nparacc))

    lfilenames = ["affordances_instrument_for", "affordances_patient"]
    ifile = 1  ## file index in list above
    lfiles = [0, 1]
    lfeatures = ["PointMutualInformation", "absolute frequency"]
    ifeature = 0
    if xdatacls.itestmode == 3:
        iloadall = 1
    else:
        iloadall = 0

    print("lfiles:", lfilenames)
    print("ifeature:", lfeatures[ifeature])

    for ipar in range(npar):

        ## possible values
        Y0 = np.array([-1, 0, 1])
        ctables = webrel_load_data.cls_label_files()
        print(ctables.listcsv[ifile])
        (xdata, nrow2, ncol2, ifixtrain, ifixtest) = ctables.load_objobj_act(lfiles, ifeature)
        xdatacls.load_data(xdata, xdatacls.categorymax, int(nrow2), int(ncol2), Y0)
        xdatacls.ifixtrain = ifixtrain
        xdatacls.ifixtest = ifixtest

        scombine = ""
        if xdatacls.itestmode == 0:
            if xdatacls.ibootstrap == 0:
                fname = "xresultte_rand" + scombine + ".csv"
            elif xdatacls.ibootstrap == 1:
                fname = "xresultte_active" + scombine + ".csv"
            elif xdatacls.ibootstrap == 2:
                fname = "xresultte_greedy" + scombine + ".csv"
            elif xdatacls.ibootstrap == 3:
                fname = "xresultte_act_rand" + scombine + ".csv"
        else:
            fname = "xresultte_ncross" + scombine + ".csv"

        xdatacls.YKernel.ymax = 10
        # it will be recomputed in mvm_ranges
        xdatacls.YKernel.ymin = -10
        xdatacls.YKernel.yrange = 200  # it will be recomputed in classcol_ranges
        xdatacls.YKernel.ystep = (xdatacls.YKernel.ymax - xdatacls.YKernel.ymin) / xdatacls.YKernel.yrange
        ##  set_printoptions(precision=4)
        nparam = 4  # C,D,par1,par2
        nreport = 4  ## accuracy, precision, recall, f1

        xdatacls.prepare_repetition_folding(init_train_size=100)
        nrepeat0 = xdatacls.nrepeat0
        nfold0 = xdatacls.nfold0
        if xdatacls.itestmode == 3:
            nfold0 = 1

        creport = mmr_report_cls.cls_mmr_report()
        creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport)
        xbest_param = np.zeros((nrepeat0, nfold0, nparam))

        # ############################################################

        nval = max(xdatacls.YKernel.valrange) + 1
        xconfusion3 = np.zeros((nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval))

        xsolvertime = 0.0
        ireport = 0
        for irepeat in range(nrepeat0):

            xdatacls.nfold0 = xdatacls.nfold
            xdatacls.prepare_repetition_training()

            for ifold in range(nfold0):

                xdatacls.prepare_fold_training(ifold)

                # validation to choose the best parameters
                print("Validation")
                xdatacls.set_validation()
                cvalidation = mvm_validation_cls.cls_mvm_validation()
                cvalidation.validation_rkernel = xdatacls.XKernel[0].title
                best_param = cvalidation.mvm_validation(xdatacls)

                print("Parameters:", best_param.c, best_param.d, best_param.par1, best_param.par2)

                print("Best parameters found by validation")
                xbest_param[irepeat, ifold, 0] = best_param.c
                xbest_param[irepeat, ifold, 1] = best_param.d
                xbest_param[irepeat, ifold, 2] = best_param.par1
                xbest_param[irepeat, ifold, 3] = best_param.par2

                # training with the best parameters
                print("training")

                time0 = time.time()
                cOptDual = xdatacls.mvm_train()
                xsolvertime += xdatacls.solvertime
                print("Training time:", time.time() - time0)
                sys.stdout.flush()

                # check the train accuracy
                print("test on training")

                # check the test accuracy
                print("test on test")
                time0 = time.time()

                if xdatacls.ifulltest == 1:
                    xdatacls.xdata_tes = ctables.full_test()
                    xdatacls.xranges_rel_test = mvm_prepare.mvm_ranges(xdatacls.xdata_tes, xdatacls.nrow)

                cPredict = xdatacls.mvm_test()
                print("Test time:", time.time() - time0)
                sys.stdout.flush()

                filename = "predicted_missing.csv"
                ctables.export_prediction(filename, xdatacls, cPredict.Zrow)

                # counts the proportion the ones predicted correctly
                # ####################################
                time0 = time.time()
                if xdatacls.knowntest == 1:
                    (cEval, icandidate_w, icandidate_b) = mvm_eval(
                        xdatacls.ieval_type, xdatacls.nrow, xdatacls, cPredict.Zrow
                    )
                    print("Evaluation time:", time.time() - time0)
                    (qtest, qpred, qpred0) = makearray(xdatacls, cPredict.Zrow)

                    if xdatacls.ieval_type in (0, 11):
                        creport.set_xaprf(irepeat, ifold, cEval)
                    elif xdatacls.ieval_type == 10:
                        creport.set_xaprf(irepeat, ifold, cEval)
                        xconfusion3[irepeat, ifold] = cEval.xconfusion3
                    else:
                        creport.set_xaprf(irepeat, ifold, cEval)

                    ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
                    ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
                    ireport += 1

                    ## print(cEval.xconfusion)
                    if xdatacls.ieval_type in (0, 11):
                        for xconfrow in cEval.xconfusion:
                            for ditem in xconfrow:
                                print("%7.0f" % ditem, end="")
                            print()
                        print()
                    elif xdatacls.ieval_type == 10:
                        for xtable in cEval.xconfusion3:
                            xsum = np.sum(xtable)
                            if xsum == 0:
                                xsum = 1
                            xtable = 100 * xtable / xsum
                            for xconfrow in xtable:
                                for ditem in xconfrow:
                                    print("%9.4f" % ditem, end="")
                                print()
                            print()
                        print()

                    # ####################################
                    print("*** ipar, repeatation, fold ***")
                    print(ipar, irepeat, ifold)

                    if xdatacls.itestmode == 1:  ## n-fold crossvalidation

                        creport.report_prf(
                            xmask=[irepeat, ifold],
                            stitle="Result in one fold and one repetation",
                            ssubtitle="Accuracy on test",
                        )

            if xdatacls.knowntest == 1:
                creport.report_prf(
                    xmask=[irepeat, None],
                    stitle="Result in one repetation",
                    ssubtitle="Mean and std of the accuracy on test",
                )

            sys.stdout.flush()

        if xdatacls.knowntest == 1:
            (xmean, xstd) = creport.report_prf(
                xmask=[None, None],
                stitle="***** Overall result ****",
                ssubtitle="Mean and std of the accuracy on test + error",
            )

            xsummary[ipar, 0] = xmean[0]
            xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0)

        if xdatacls.itestmode == 3:
            filename = "predicted_missing.csv"
            ## ctables.export_prediction(filename,xdatacls,cPredict.Zrow)

            ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

        print("Average best parameters")
        xlabels = ("c", "d", "par1", "par2")
        for i in range(nparam):
            print(xlabels[i], ": ", np.mean(xbest_param[:, :, i]), "(", np.std(xbest_param[:, :, i]), ")")

    if xdatacls.knowntest == 1:
        print("$$$$$$$$$ Summary results:")
        (m, n) = xsummary.shape
        for i in range(m):
            for j in range(n):
                print("%10.4f" % xsummary[i, j], end="")
            print()

    ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
    print("Bye")

    return
Exemple #6
0
def test_mvm_main(workmode):

  params=mmr_setparams.cls_params()

  xdatacls=mvm_mvm_cls.cls_mvm()
  nfold=xdatacls.nfold
  if xdatacls.itestmode==0:
    nfold0=1        ## active learning
  else:
    nfold0=nfold    ## n-fold cross validation

  nparacc=2   ## rmse, time
  npar=1
  xsummary=np.zeros((npar,nparacc))
  
  ## ['full','full_20','full_40','full_60', \
  ##  'known','known_20','known_40','known_60']
  ifile1=0   ## file index in list known
  ifile2=0   ## file index in list full
  iknown1=1  ## known 
  iknown2=0  ## full
  iloadall=1  ## =0 one file for crossvalidation =1 two files: training + test

  print('iknown1:',iknown1,'iknown2:',iknown2)
  print('ifile1:',ifile1,'ifile2:',ifile2)
  
  for ipar in range(npar):

    ## possible values
    Y0=np.array([0,1])
    ctables=kingsc_load_data.cls_label_files()  ## data loading object
    print(ctables.listknown[ifile1])
    print(ctables.listfull[ifile2])
    if iloadall==0:   ## only one file is loaded for cross validation
      (xdata,nrow2,ncol2)=ctables.load_onefile(iknown1,ifile1) 
      xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),Y0)
    else: ## the first file gives trining the second serves as test 
      (xdata,nrow2,ncol2,ifixtrain,ifixtest)=ctables.load_twofiles( \
                          iknown1,iknown2,ifile1,ifile2)
      xdatacls.load_data(xdata,xdatacls.categorymax, \
                       int(nrow2),int(ncol2),Y0)
      xdatacls.ifixtrain=ifixtrain
      xdatacls.ifixtest=ifixtest

    scombine=''
    if xdatacls.itestmode==0:
      if xdatacls.ibootstrap==0:
        fname='xresultte_rand'+scombine+'.csv'
      elif xdatacls.ibootstrap==1:  
        fname='xresultte_active'+scombine+'.csv'
      elif xdatacls.ibootstrap==2:  
        fname='xresultte_greedy'+scombine+'.csv'
      elif xdatacls.ibootstrap==3:  
        fname='xresultte_act_rand'+scombine+'.csv'
    else:
      fname='xresultte_ncross'+scombine+'.csv'

    xdatacls.YKernel.ymax=1
    # it will be recomputed in mvm_ranges
    xdatacls.YKernel.ymin=0
    xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges
    xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \
                            /xdatacls.YKernel.yrange
    ##  set_printoptions(precision=4)
    nparam=4    # C,D,par1,par2
    nreport=4   ## accuracy, precision, recall, f1

    xdatacls.prepare_repetition_folding(init_train_size=100)
    nrepeat0=xdatacls.nrepeat0
    nfold0=xdatacls.nfold0

    creport=mmr_report_cls.cls_mmr_report()
    creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold,nreport=nreport)
    xbest_param=np.zeros((nrepeat0,nfold0,nparam))

    # ############################################################

    nval=max(xdatacls.YKernel.valrange)+1
    xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval))

    xsolvertime=0.0
    ireport=0
    for irepeat in range(nrepeat0):

      xdatacls.nfold0=xdatacls.nfold
      xdatacls.prepare_repetition_training()
      ## nfold0=1

      for ifold in range(nfold0):

        xdatacls.prepare_fold_training(ifold)

    # validation to choose the best parameters
        print('Validation')
        xdatacls.set_validation()
        cvalidation=mvm_validation_cls.cls_mvm_validation()
        cvalidation.validation_rkernel=xdatacls.XKernel[0].title
        best_param=cvalidation.mvm_validation(xdatacls)

        print('Parameters:',best_param.c,best_param.d, \
              best_param.par1,best_param.par2)

        print('Best parameters found by validation')
        xbest_param[irepeat,ifold,0]=best_param.c
        xbest_param[irepeat,ifold,1]=best_param.d
        xbest_param[irepeat,ifold,2]=best_param.par1
        xbest_param[irepeat,ifold,3]=best_param.par2

    # training with the best parameters
        print('training')

        time0=time.time()
        cOptDual= xdatacls.mvm_train()
        xsolvertime+=xdatacls.solvertime
        print('Training time:',time.time()-time0)
        sys.stdout.flush()

    # check the train accuracy
        print('test on training')

    # check the test accuracy
        print('test on test')
        time0=time.time()

#         xdatacls.xdata_tes=ctables.full_test()
#         xdatacls.xranges_rel_test=mvm_prepare.mvm_ranges(xdatacls.xdata_tes, \
#                                                xdatacls.nrow)
        
        cPredict=xdatacls.mvm_test()
        print('Test time:',time.time()-time0)
        sys.stdout.flush()

        ## ctables.export_prediction(cPredict.Zrow)

    # counts the proportion the ones predicted correctly
    # ####################################
        time0=time.time()
        if xdatacls.knowntest==1:
          (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \
                                            xdatacls.nrow, \
                                            xdatacls,cPredict.Zrow)
          print('Evaluation time:',time.time()-time0)
          ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

          if xdatacls.ieval_type in (0,11):
            creport.set_xaprf(irepeat,ifold,cEval)
          elif xdatacls.ieval_type==10:
            creport.set_xaprf(irepeat,ifold,cEval)
            xconfusion3[irepeat,ifold]=cEval.xconfusion3
          else:
            creport.set_xaprf(irepeat,ifold,cEval)

          ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w]
          ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b]
          ireport+=1

          ## print(cEval.xconfusion)
          if xdatacls.ieval_type in (0,11):
            for xconfrow in cEval.xconfusion:
              for ditem in xconfrow:
                print('%7.0f'%ditem,end='')
              print()
            print()
          elif xdatacls.ieval_type==10:
            for xtable in cEval.xconfusion3:
              xsum=np.sum(xtable)
              if xsum==0:
                xsum=1
              xtable=100*xtable/xsum
              for xconfrow in xtable:
                for ditem in xconfrow:
                  print('%9.4f'%ditem,end='')
                print()
              print()
            print()

      # ####################################    
          print('*** ipar, repeatation, fold ***') 
          print(ipar,irepeat,ifold)
        
          if xdatacls.itestmode==1: ## n-fold crossvalidation

            creport.report_prf(xmask=[irepeat,ifold], \
                             stitle='Result in one fold and one repetation', \
                             ssubtitle='Accuracy on test')

      if xdatacls.knowntest==1:
        creport.report_prf(xmask=[irepeat,None], \
                         stitle='Result in one repetation', \
                         ssubtitle='Mean and std of the accuracy on test')

      sys.stdout.flush()


    if xdatacls.knowntest==1:
      (xmean,xstd)=creport.report_prf(xmask=[None,None], \
                     stitle='***** Overall result ****', \
                     ssubtitle='Mean and std of the accuracy on test + error')

      xsummary[ipar,0]=xmean[0]
      xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0)                          

    if iloadall==1:
      filename='predicted_missing'
      if iknown1==1:
        filename+='_'+ctables.listknown[ifile1]
      else:
        filename+='_'+ctables.listfull[ifile1]
      if iknown2==1:
        filename+='_'+ctables.listknown[ifile2]
      else:
        filename+='_'+ctables.listfull[ifile2]
      filename+='.csv'
      ctables.export_test_prediction(filename,xdatacls,cPredict.Zrow)

      ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow)

    print('Average best parameters')
    xlabels=('c','d','par1','par2')
    for i in range(nparam):
      print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \
              '(',np.std(xbest_param[:,:,i]),')')

  if xdatacls.knowntest==1:
    print('$$$$$$$$$ Summary results:')
    (m,n)=xsummary.shape
    for i in range(m):
      for j in range(n):
        print('%10.4f'%xsummary[i,j],end='')
      print()

  ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f')
  print('Bye')    
  
  return
    def mvm_validation_body(self, xdatacls):
        """

    Input:
    xdatacls      data class
    params        global parameters

    Output:
    best_param    the best kernel parameters found by cross validation
                  on the split training
    """

        nrow = xdatacls.nrow

        ## construct the data object out of the training items
        xdatacls_val = mvm_mvm_cls.cls_mvm()
        xdatacls.copy(xdatacls_val)

        xparam = cls_empty_class()

        best_param = cls_empty_class()
        best_param.c = 1
        best_param.d = 0
        best_param.par1 = 0
        best_param.par2 = 0

        if self.validation_rkernel in xdatacls_val.dkernels:
            rkernel = xdatacls_val.dkernels[self.validation_rkernel]
        else:
            rkernel = xdatacls_val.XKernel[0]

        kernel_type = rkernel.kernel_params.kernel_type
        kinput = rkernel.crossval

        if kernel_type == 0:
            ip1min = 0
            ip1max = 0
            ip2min = 0
            ip2max = 0
            ip1step = 1
            ip2step = 1
        elif kernel_type in (1, 2):
            ip1min = kinput.par1min
            ip1max = kinput.par1max
            ip2min = kinput.par2min
            ip2max = kinput.par2max
            ip1step = kinput.par1step
            ip2step = kinput.par2step
        elif kernel_type in (3, 31, 32, 41, 53, 5):
            if kinput.nrange > 1:
                if kinput.par1max > kinput.par1min:
                    dpar = np.power(kinput.par1max / kinput.par1min,
                                    1 / (kinput.nrange - 1))
                    ip1max = kinput.nrange
                else:
                    dpar = 1.0
                    ip1max = 1.0
            else:
                ip1max = 1.0
                dpar = 1.0

            ip1min = 1
            ip2min = kinput.par2min
            ip2max = kinput.par2max
            ip1step = 1
            ip2step = kinput.par2step
        else:
            ip1min = 1
            ip1max = 1
            ip2min = 1
            ip2max = 1
            ip1step = 1
            ip2step = 1

    #  vnfold=4 # number of validation folds
        mdata = xdatacls_val.xdata_rel[0].shape[0]
        vnfold = self.vnfold  # number of validation folds
        vxsel = np.floor(np.random.rand(mdata) * vnfold)
        vxsel = vxsel - (vxsel == vnfold)
        ##  vpredtr=np.zeros(vnfold) # valid
        vpred = np.zeros(vnfold)  # train

        print('C,D,par1,par2,traning accuracy,validation test accuracy')

        # scanning the parameter space

        if xdatacls_val.ieval_type in (0, 10, 11):
            xxmax = -np.inf
        else:
            xxmax = np.inf

        penalty = xdatacls_val.penalty.crossval
        crange=np.arange(penalty.par1min,penalty.par1max+penalty.par1step/2, \
                         penalty.par1step)
        drange=np.arange(penalty.par2min,penalty.par2max+penalty.par2step/2, \
                         penalty.par2step)

        p1range = np.arange(ip1min, ip1max + ip1step / 2, ip1step)
        p2range = np.arange(ip2min, ip2max + ip2step / 2, ip2step)

        for iC in crange:
            for iD in drange:
                for ip1 in p1range:
                    for ip2 in p2range:
                        if kernel_type in (3, 31, 32, 41, 53, 5):
                            dpar1 = kinput.par1min * dpar**(ip1 - 1)
                            dpar2 = ip2
                        else:
                            dpar1 = ip1
                            dpar2 = ip2

                        xdatacls_val.penalty.c = iC
                        xdatacls_val.d = iD
                        rkernel.kernel_params.ipar1 = dpar1
                        rkernel.kernel_params.ipar2 = dpar2

                        for vifold in range(vnfold):

                            xdatacls_val.split_train_test(vxsel, vifold)
                            xdatacls_val.mvm_datasplit()
                            xdatacls_val.xranges_rel=mvm_ranges(xdatacls_val.xdata_tra, \
                                                             xdatacls_val.nrow)
                            xdatacls_val.xranges_rel_test=mvm_ranges(xdatacls_val.xdata_tes, \
                                                             xdatacls_val.nrow)
                            if xdatacls.category == 0 or xdatacls.category == 3:
                                ## pass
                                mvm_glm(xdatacls_val)
                                mvm_ygrid(xdatacls_val)
                            else:
                                mvm_largest_category(xdatacls_val)

                            if self.report == 1:
                                print('validation training')
                            xdatacls_val.mvm_train()

                            # validation test
                            if self.report == 1:
                                print('validation test on validation test')
                            cPredict = xdatacls_val.mvm_test()

                            # counts the proportion the ones predicted correctly
                            # ##############################################
                            cEval=mvm_eval(xdatacls_val.ieval_type,nrow,xdatacls_val, \
                                               cPredict.Zrow)[0]
                            if xdatacls_val.ieval_type in (0, 10, 11):
                                if xdatacls_val.ibinary == 0:
                                    vpred[vifold] = cEval.accuracy
                                elif xdatacls_val.ibinary == 1:
                                    vpred[vifold] = cEval.f1
                            else:
                                vpred[vifold] = cEval.deval

                        print('%9.5g'%iC,'%9.5g'%iD,'%9.5g'%dpar1,'%9.5g'%dpar2, \
                              '%9.5g'%(np.mean(vpred)))
                        ## print(iC,iD,dpar1,dpar2,np.mean(vpred))
                        # searching for the best configuration in validation
                        mvpred = np.mean(vpred)

                        if xdatacls_val.ieval_type in (0, 10, 11):
                            if mvpred > xxmax:
                                xxmax = mvpred
                                xparam.c = iC
                                xparam.d = iD
                                xparam.par1 = dpar1
                                xparam.par2 = dpar2
                                print('The best:', xxmax)
                        else:
                            if mvpred < xxmax:
                                xxmax = mvpred
                                xparam.c = iC
                                xparam.d = iD
                                xparam.par1 = dpar1
                                xparam.par2 = dpar2
                                print('The best:', xxmax)

                        sys.stdout.flush()

        best_param = xparam

        return (best_param)
Exemple #8
0
  def mvm_validation_body(self,xdatacls):
    """

    Input:
    xdatacls      data class
    params        global parameters

    Output:
    best_param    the best kernel parameters found by cross validation
                  on the split training
    """

    nrow=xdatacls.nrow

    ## construct the data object out of the training items
    xdatacls_val=mvm_mvm_cls.cls_mvm()
    xdatacls.copy(xdatacls_val)

    xparam=cls_empty_class()

    best_param=cls_empty_class()
    best_param.c=1
    best_param.d=0
    best_param.par1=0
    best_param.par2=0

    if self.validation_rkernel in xdatacls_val.dkernels:
      rkernel=xdatacls_val.dkernels[self.validation_rkernel]
    else:
      rkernel=xdatacls_val.XKernel[0]

    kernel_type=rkernel.kernel_params.kernel_type
    kinput=rkernel.crossval

    if kernel_type==0:
      ip1min=0
      ip1max=0
      ip2min=0
      ip2max=0
      ip1step=1
      ip2step=1
    elif kernel_type in (1,2):
      ip1min=kinput.par1min
      ip1max=kinput.par1max
      ip2min=kinput.par2min
      ip2max=kinput.par2max
      ip1step=kinput.par1step
      ip2step=kinput.par2step
    elif kernel_type in (3,31,32,41,53,5):
      if kinput.nrange>1:
        if kinput.par1max>kinput.par1min:
          dpar= np.power(kinput.par1max/kinput.par1min,1/(kinput.nrange-1))
          ip1max=kinput.nrange
        else:
          dpar=1.0
          ip1max=1.0
      else:
        ip1max=1.0
        dpar=1.0

      ip1min=1
      ip2min=kinput.par2min
      ip2max=kinput.par2max
      ip1step=1
      ip2step=kinput.par2step
    else: 
      ip1min=1
      ip1max=1
      ip2min=1
      ip2max=1
      ip1step=1
      ip2step=1

  #  vnfold=4 # number of validation folds
    mdata=xdatacls_val.xdata_rel[0].shape[0]
    vnfold=self.vnfold # number of validation folds
    vxsel=np.floor(np.random.rand(mdata)*vnfold)
    vxsel=vxsel-(vxsel==vnfold)
  ##  vpredtr=np.zeros(vnfold) # valid
    vpred=np.zeros(vnfold) # train

    print('C,D,par1,par2,traning accuracy,validation test accuracy')    

    # scanning the parameter space

    if xdatacls_val.ieval_type in (0,10,11):
      xxmax=-np.inf
    else:
      xxmax=np.inf

    penalty=xdatacls_val.penalty.crossval
    crange=np.arange(penalty.par1min,penalty.par1max+penalty.par1step/2, \
                     penalty.par1step)
    drange=np.arange(penalty.par2min,penalty.par2max+penalty.par2step/2, \
                     penalty.par2step)

    p1range=np.arange(ip1min,ip1max+ip1step/2,ip1step)
    p2range=np.arange(ip2min,ip2max+ip2step/2,ip2step)

    for iC in crange:
      for iD in drange:
        for ip1 in p1range:
          for ip2 in p2range:
            if kernel_type in (3,31,32,41,53,5): 
              dpar1=kinput.par1min*dpar**(ip1-1)
              dpar2=ip2
            else:
              dpar1=ip1
              dpar2=ip2

            xdatacls_val.penalty.c=iC;
            xdatacls_val.d=iD;
            rkernel.kernel_params.ipar1=dpar1;
            rkernel.kernel_params.ipar2=dpar2;

            for vifold in range(vnfold):

              xdatacls_val.split_train_test(vxsel,vifold)
              xdatacls_val.mvm_datasplit()        
              xdatacls_val.xranges_rel=mvm_ranges(xdatacls_val.xdata_tra, \
                                               xdatacls_val.nrow)
              xdatacls_val.xranges_rel_test=mvm_ranges(xdatacls_val.xdata_tes, \
                                               xdatacls_val.nrow)
              if xdatacls.category==0 or xdatacls.category==3:
                ## pass
                mvm_glm(xdatacls_val)
                mvm_ygrid(xdatacls_val)
              else:
                mvm_largest_category(xdatacls_val)

              if self.report==1:
                print('validation training')
              xdatacls_val.mvm_train()

  # validation test
              if self.report==1:
                print('validation test on validation test')
              cPredict=xdatacls_val.mvm_test() 

  # counts the proportion the ones predicted correctly    
  # ##############################################
              cEval=mvm_eval(xdatacls_val.ieval_type,nrow,xdatacls_val, \
                                 cPredict.Zrow)[0]
              if xdatacls_val.ieval_type in (0,10,11):
                if xdatacls_val.ibinary==0:
                  vpred[vifold]=cEval.accuracy
                elif xdatacls_val.ibinary==1:
                  vpred[vifold]=cEval.f1
              else:
                vpred[vifold]=cEval.deval

            print('%9.5g'%iC,'%9.5g'%iD,'%9.5g'%dpar1,'%9.5g'%dpar2, \
                  '%9.5g'%(np.mean(vpred)))
            ## print(iC,iD,dpar1,dpar2,np.mean(vpred))
  # searching for the best configuration in validation
            mvpred=np.mean(vpred)

            if xdatacls_val.ieval_type in (0,10,11):
              if mvpred>xxmax:
                xxmax=mvpred
                xparam.c=iC
                xparam.d=iD
                xparam.par1=dpar1
                xparam.par2=dpar2
                print('The best:',xxmax)
            else:
              if mvpred<xxmax:
                xxmax=mvpred
                xparam.c=iC
                xparam.d=iD
                xparam.par1=dpar1
                xparam.par2=dpar2
                print('The best:',xxmax)

            sys.stdout.flush()

    best_param=xparam

    return(best_param)