def plotCValues(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            use_log=False):
  if use_log == True:
    post = 'log'
  else:
    post = ''

  n_hist_c = 200
  keys = ['true','dec']
  c1_values = dict((key,np.zeros(n_hist_c)) for key in keys)
  c2_values = dict((key,np.zeros(n_hist_c)) for key in keys)
  c1_2 = np.loadtxt('{0}/fitting_values_c1c2{1}.txt'.format(dir,post))
  c1_values['true'] = c1_2[:,0]
  c1_values['dec'] = c1_2[:,1]
  c2_values['true'] = c1_2[:,2]
  c2_values['dec'] = c1_2[:,3]
  
  saveFig([],[c1_values['true'],c1_values['dec']], 
      makePlotName('c1c2','train',type='c1_hist{0}'.format(post)),hist=True, 
      axis=['signal weight'],marker=True,marker_value=c1[0],
      labels=['true','composed'],x_range=[0.,0.2],dir=dir,
      model_g=model_g,title='Histogram for estimated values signal weight',print_pdf=True)
  saveFig([],[c2_values['true'],c2_values['dec']], 
      makePlotName('c1c2','train',type='c2_hist{0}'.format(post)),hist=True, 
      axis=['bkg. weight'],marker=True,marker_value=c1[1],
      labels=['true','composed'],x_range=[0.1,0.4],dir=dir,
      model_g=model_g,title='Histogram for estimated values bkg. weight',print_pdf=True)
Ejemplo n.º 2
0
def plotCValues(c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            use_log=False, n_hist=150,c_eval=0, range_min=-1.0,range_max=0.):
  if use_log == True:
    post = 'log'
  else:
    post = ''

  keys = ['true','dec']
  c1_ = dict((key,np.zeros(n_hist)) for key in keys)
  c1_values = dict((key,np.zeros(n_hist)) for key in keys)
  c2_values = dict((key,np.zeros(n_hist)) for key in keys)
  c1_1 = np.loadtxt('{0}/fitting_values_c1.txt'.format(dir))  
  c1_['true'] = c1_1[:,0]
  c1_['dec'] = c1_1[:,1]
  if true_dist == True:
    vals = [c1_['true'],c1_['dec']]
    labels = ['true','dec']
  else:
    vals = c1_['dec']
    vals1 = c1_1[:,3]
    labels = ['dec']
  #vals = vals[vals <> 0.5]
  #vals = vals[vals <> 1.4]
  #vals1 = vals1[vals1 <> 1.1]
  #vals1 = vals1[vals1 <> 1.7]
  size = min(vals.shape[0],vals1.shape[0])
  #saveFig([],[vals1], 
  #    makePlotName('g2','train',type='hist_g1g2'),hist=True, 
  #    axis=['g2'],marker=True,marker_value=c1[c_eval],
  #    labels=labels,x_range=[range_min,range_max],dir=dir,
  #    model_g=model_g,title='Histogram for fitted g2', print_pdf=True)
  saveFig([],[vals,vals1], 
      makePlotName('g1g2','train',type='hist'),hist=True,hist2D=True, 
      axis=['g1','g2'],marker=True,marker_value=c1,
      labels=labels,dir=dir,model_g=model_g,title='2D Histogram for fitted g1,g2', print_pdf=True,
      x_range=[[0.5,1.4],[1.1,1.9]])
Ejemplo n.º 3
0
def CrossSectionCheck2D(dir,c1_g,model_g,data_files,f1_dist,accept_list,c_min,c_max,npoints,n_eval,feature):
  ''' 
    2D likelihood plots for a single feature
  '''

  # 2D version
  csarray = np.linspace(c_min[0],c_max[0],npoints)
  csarray2 = np.linspace(c_min[1], c_max[1], npoints)

  all_indexes = np.loadtxt('3indexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) 
  all_indexes = np.array([int(x) for x in all_indexes])
  all_couplings = np.loadtxt('3couplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) 
  all_cross_sections = np.loadtxt('3crosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints))

  basis_files = [data_files[i] for i in all_indexes]
  samplesdata = []
  data_file='data'
  for i,sample in enumerate(basis_files):
    samplesdata.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,sample)))

  print all_indexes
  targetdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,f1_dist))
 
  likelihoods = np.zeros((npoints,npoints))
  n_effs = np.zeros((npoints,npoints))
  n_zeros = np.zeros((npoints,npoints))

  for k,cs in enumerate(csarray):
    for j,cs2 in enumerate(csarray2):
      likelihood,n_eff,n_zero = checkCrossSection(all_couplings[k*npoints+j],all_cross_sections[k*npoints + j],basis_files,f1_dist,
              dir,c1_g,model_g,feature=feature,targetdata=targetdata,samplesdata=samplesdata)
      likelihoods[k,j] = likelihood
      n_effs[k,j] = n_eff
      n_zeros[k,j] = n_zero
  #print likelihoods
  saveFig(csarray,[csarray2,likelihoods],makePlotName('feature{0}'.format(25),'train',type='pixel_g1g2'),labels=['composed'],pixel=True,marker=True,dir=dir,model_g=model_g,marker_value=(1.0,0.5),print_pdf=True,contour=True,title='Feature for g1,g2')
def fit(input_workspace,dir,model_g='mlp',c1_g='breast',data_file='data',
      model_file='train',verbose_printing=True):

  bins = 80
  low = 0.
  high = 1.  
  
  if input_workspace <> None:
    f = ROOT.TFile('{0}/{1}'.format(dir,input_workspace))
    w = f.Get('w')
    # TODO test this when workspace is present
    w = ROOT.RooWorkspace('w') if w == None else w
    f.Close()
  else: 
    w = ROOT.RooWorkspace('w')
  w.Print()

  print 'Generating Score Histograms'

  w.factory('score[{0},{1}]'.format(low,high))
  s = w.var('score')
  
  def saveHisto(w,outputs,s,bins,low,high,k='F0',j='F1'):
    
    print 'Estimating {0} {1}'.format(k,j)
    for l,name in enumerate(['sig','bkg']):
      data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data",
          ROOT.RooArgSet(s))
      hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high)
      values = outputs[l]
      #values = values[self.findOutliers(values)]
      for val in values:
        hist.Fill(val)
        s.setVal(val)
        data.add(ROOT.RooArgSet(s))
      norm = 1./hist.Integral()
      hist.Scale(norm) 
        
      s.setBins(bins)
      datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist',
            ROOT.RooArgList(s),hist)
      histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
            ROOT.RooArgSet(s), datahist, 1)

      getattr(w,'import')(hist)
      getattr(w,'import')(data)
      getattr(w,'import')(datahist) # work around for morph = w.import(morph)
      getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
      score_str = 'score'
      # Calculate the density of the classifier output using kernel density 
      #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str))

  # Full model
  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  traindata = data[:,:-1]
  targetdata = data[:,-1]

  numtrain = traindata.shape[0]       
  size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1

  outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/transfer_learning/{0}_F0_F1.pkl'.format(model_file),traindata[targetdata==1],model_g=model_g),
            predict('/afs/cern.ch/work/j/jpavezse/private/transfer_learning/{0}_F0_F1.pkl'.format(model_file),traindata[targetdata==0],model_g=model_g)]

  saveHisto(w,outputs,s, bins, low, high)

  if verbose_printing == True:
    printFrame(w,['score'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=dir,c1_g=c1_g,model_g=model_g),['signal','bkg'],
  dir=dir,model_g=model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
 
  w.writeToFile('{0}/{1}'.format(dir,input_workspace))
  w.Print()
def computeRatios(workspace,data_file,model_file,dir,model_g,c1_g,true_dist=False,
      vars_g=None):
  '''
    Use the computed score densities to compute 
    the ratio test.
 
  '''

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  

  print 'Calculating ratios'

  npoints = 50

  score = ROOT.RooArgSet(w.var('score'))
  getRatio = singleRatio

  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)

  # NN trained on complete model
  F0pdf = w.function('bkghistpdf_F0_F1')
  F1pdf = w.function('sighistpdf_F0_F1')
  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  testdata = data[:,:-1]
  testtarget = data[:,-1]

  '''
  # Make ratio considering tumor size unknown
  ts_idx = 2
  target = testdata[0]
  testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()])
  '''

  if true_dist == True and len(vars_g) == 1:
      xarray = np.linspace(1,10,npoints)
      # TODO: Harcoded dist names
      F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray])
      F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray])
      trueRatio = getRatio(F1dist, F0dist)

      outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g)

      F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
      F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

      completeRatio = getRatio(F0fulldist,F1fulldist)

      saveFig(xarray, [completeRatio, trueRatio], makePlotName('all','train',type='ratio'),title='Density Ratios',labels=['Trained', 'Truth'], print_pdf=True,dir=dir)
  
  outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),testdata,model_g=model_g)

  F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
  F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

  completeRatio = getRatio(F1fulldist,F0fulldist)
  complete_target = testtarget
  #Histogram F0-f0 for composed, full and true

  # Removing outliers
  numtest = completeRatio.shape[0]
  #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]

  complete_outliers = np.zeros(numtest,dtype=bool)
  complete_outliers = findOutliers(completeRatio)
  complete_target = testtarget[complete_outliers] 
  completeRatio = completeRatio[complete_outliers]

  bins = 70
  low = 0.6
  high = 1.2

  for l,name in enumerate(['sig','bkg']):
    minimum = completeRatio[complete_target == 1-l].min() 
    maximum = completeRatio[complete_target == 1-l].max()

    low = minimum - ((maximum - minimum) / bins)*10
    high = maximum + ((maximum - minimum) / bins)*10
    w.factory('ratio{0}[{1},{2}]'.format(name, low, high))
    ratio_var = w.var('ratio{0}'.format(name))

    numtest = completeRatio.shape[0] 
    hist = ROOT.TH1F('{0}hist_F0_f0'.format(name),'hist',bins,low,high)
    for val in completeRatio[complete_target == 1-l]:
      hist.Fill(val)
    datahist = ROOT.RooDataHist('{0}datahist_F0_f0'.format(name),'hist',
          ROOT.RooArgList(ratio_var),hist)
    ratio_var.setBins(bins)
    histpdf = ROOT.RooHistFunc('{0}histpdf_F0_f0'.format(name),'hist',
          ROOT.RooArgSet(ratio_var), datahist, 0)

    histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
    getattr(w,'import')(hist)
    getattr(w,'import')(datahist) # work around for morph = w.import(morph)
    getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
    #print '{0} {1} {2}'.format(curr,name,hist.Integral())

    if name == 'bkg':
      all_ratios_plots = [w.function('sighistpdf_F0_f0'),
            w.function('bkghistpdf_F0_f0')]
      all_names_plots = ['sig','bkg']
    
  printFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist',dir=dir,model_g=model_g,c1_g=c1_g),all_names_plots,dir=dir,model_g=model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)

  #completeRatio = np.log(completeRatio)
  completeRatio = completeRatio + np.abs(completeRatio.min())
  ratios_list = completeRatio / completeRatio.max()
  legends_list = ['composed','full']
  makeSigBkg([ratios_list],[complete_target],makePlotName('comp','all',type='sigbkg',dir=dir,model_g=model_g,c1_g=c1_g),dir=dir,model_g=model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')

  # Make transfer learning

  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  # Transforming f1 into f0
  data_f1 = data[data[:,-1] == 0.]
  data_f0 = data[data[:,-1] == 1.]
  testdata = data_f1[:,:-1]
  testtarget = data_f1[:,-1]

  '''
  # Make ratio considering tumor size unknown
  ts_idx = 2
  target = testdata[0]
  testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()])
  pdb.set_trace()
  '''

  xarray = testdata

  outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g)

  F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
  F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

  completeRatio = getRatio(F0fulldist,F1fulldist)

  if len(vars_g) == 1:
    F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray])
    F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray])
  else:
    F1dist = np.array([evalDist(x,w.pdf('f1'),xs) for xs in xarray])
    F0dist = np.array([evalDist(x,w.pdf('f0'),xs) for xs in xarray])

  trueRatio = getRatio(F1dist, F0dist)

  trueIndexes = findOutliers(trueRatio)
  completeIndexes = findOutliers(completeRatio)
  #indexes = np.logical_and(trueIndexes,completeIndexes)
  indexes = completeIndexes
  data_f1_red = data_f1
  #trueRatio = trueRatio[indexes]
  #completeRatio = completeRatio[indexes]
  #data_f1_red = data_f1[indexes]


  for f in range(10):
    feature = f
    # Transfering distributions
    # Doing histogram manipulation
    fig,ax = plt.subplots()
    colors = ['b-','r-','k-']
    colors_rgb = ['blue','red','black']
    
    hist,bins = np.histogram(data_f1[:,feature],bins=20, range=(0.,10.),density=True)


    hist_transfered,bins_1 = np.histogram(data_f1_red[:,feature],weights=trueRatio,bins=20, range=(0.,10.),density=True)
    hist_transfered_clf,bins_2 = np.histogram(data_f1_red[:,feature],bins=20,weights=completeRatio, range=(0.,10.),density=True)
    hist0,bins0 = np.histogram(data_f0[:,feature], bins=20, range=(0.,10.),density=True)

    #hist, bins =  ax.hist(data_f0[:,0],color=colors_rgb[0],label='true',bins=50,histtype='stepfilled',normed=1, alpha=0.5,range=[0,100]) 

    widths = np.diff(bins)
    #hist_transfered = hist*trueRatio
    #hist_transfered_clf = hist*completeRatio

    ax.bar(bins[:-1], hist0,widths,label='f0',alpha=0.5,color='red')
    #ax.bar(bins[:-1], hist_transfered,widths,label='f1 transfered (true)',
    #    alpha=0.5,color='blue')
    ax.bar(bins[:-1], hist_transfered_clf,widths,label='f1 transfered (trained)',
        alpha=0.5,color='green')

    ax.legend(frameon=False,fontsize=11)
    ax.set_xlabel('x') 
    ax.set_ylabel('p(x)') 
    if len(vars_g) > 1:
      ax.set_title('Transfered distributions feature {0}'.format(feature))
    else:
      ax.set_title('Transfered distributions')
    file_plot =  makePlotName('all','transf',type='hist_v{0}'.format(feature),model_g=model_g) 
    fig.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file_plot))
def evalC1C2Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            c1_g='',model_g='mlp',use_log=False,true_dist=False,vars_g=None,clf=None,
            verbose_printing=False):

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)
  else:
    x = None

  score = ROOT.RooArgSet(w.var('score'))
  if use_log == True:
    evaluateRatio = test.evaluateLogDecomposedRatio
    post = 'log'
  else:
    evaluateRatio = test.evaluateDecomposedRatio
    post = ''

  npoints = 25
  csarray = np.linspace(0.01,0.2,npoints)
  cs2array = np.linspace(0.1,0.4,npoints)
  testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,'test','F1'))

  decomposedLikelihood = np.zeros((npoints,npoints))
  trueLikelihood = np.zeros((npoints,npoints))
  c1s = np.zeros(c1.shape[0])
  c0s = np.zeros(c1.shape[0])
  pre_pdf = []
  pre_dist = []
  pre_pdf.extend([[],[]])
  pre_dist.extend([[],[]])
  for k,c0_ in enumerate(c0):
    pre_pdf[0].append([])
    pre_pdf[1].append([])
    pre_dist[0].append([])
    pre_dist[1].append([])
    for j,c1_ in enumerate(c1):
      if k <> j:
        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(k,j))
        f1pdf = w.function('sighistpdf_{0}_{1}'.format(k,j))
        outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,
        'adaptive',k,j),testdata,model_g=model_g,clf=clf)
        f0pdfdist = np.array([test.evalDist(score,f0pdf,[xs]) for xs in outputs])
        f1pdfdist = np.array([test.evalDist(score,f1pdf,[xs]) for xs in outputs])
        pre_pdf[0][k].append(f0pdfdist)
        pre_pdf[1][k].append(f1pdfdist)
      else:
        pre_pdf[0][k].append(None)
        pre_pdf[1][k].append(None)
      if true_dist == True:          
        f0 = w.pdf('f{0}'.format(k))
        f1 = w.pdf('f{0}'.format(j))
        if len(testdata.shape) > 1:
          f0dist = np.array([test.evalDist(x,f0,xs) for xs in testdata])
          f1dist = np.array([test.evalDist(x,f1,xs) for xs in testdata])
        else:
          f0dist = np.array([test.evalDist(x,f0,[xs]) for xs in testdata])
          f1dist = np.array([test.evalDist(x,f1,[xs]) for xs in testdata])
        pre_dist[0][k].append(f0dist) 
        pre_dist[1][k].append(f1dist) 
  
  # Evaluate Likelihood in different c1[0] and c1[1] values
  for i,cs in enumerate(csarray):
    for j, cs2 in enumerate(cs2array):
      c1s[:] = c1[:]
      c1s[0] = cs
      c1s[1] = cs2
      c1s[2] = 1.-cs-cs2
      decomposedRatios,trueRatios = evaluateRatio(w,testdata,
      x=x,plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,
      pre_evaluation=pre_pdf,
      pre_dist=pre_dist)

      if use_log == False:
        decomposedLikelihood[i,j] = np.log(decomposedRatios).sum()
        trueLikelihood[i,j] = np.log(trueRatios).sum()
      else:
        decomposedLikelihood[i,j] = decomposedRatios.sum()
        trueLikelihood[i,j] = trueRatios.sum()

  decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
  X,Y = np.meshgrid(csarray, cs2array)
  decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
  min_value = [csarray[decMin[0]],cs2array[decMin[1]]]
  if verbose_printing == True:
      saveFig(X,[Y,decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed','true'],contour=True,marker=True,dir=dir,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=min_value)
  if true_dist == True:
    trueLikelihood = trueLikelihood - trueLikelihood.min() 
    trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
    return [[csarray[trueMin[0]],cs2array[trueMin[1]]], [csarray[decMin[0]],cs2array[decMin[1]]]]
  else:
    return [[0.,0.],[csarray[decMin[0]],cs2array[decMin[1]]]]
Ejemplo n.º 7
0
  def evalC1C2Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None):

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)
    else:
      x = None

    score = ROOT.RooArgSet(w.var('score'))
    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    csarray = np.linspace(c_min[0],c_max[0],npoints)
    csarray2 = np.linspace(c_min[1], c_max[1], npoints)
    decomposedLikelihood = np.zeros((npoints,npoints))
    trueLikelihood = np.zeros((npoints,npoints))
    c1s = np.zeros(c0.shape[0])
    pre_pdf = []
    pre_dist = []
    pre_pdf.extend([[],[]])
    pre_dist.extend([[],[]])
    # change this enumerates
    for k,c0_ in enumerate(c0):
      pre_pdf[0].append([])
      pre_pdf[1].append([])
      pre_dist[0].append([])
      pre_dist[1].append([])
      for j,c1_ in enumerate(c0):
        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
        if k <> j:
          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
          data = testdata
          if self.preprocessing == True:
            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
            self.dataset_names[max(index_k,index_j)],self.scaler) 
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          pre_pdf[0][k].append(f0pdfdist)
          pre_pdf[1][k].append(f1pdfdist)
        else:
          pre_pdf[0][k].append(None)
          pre_pdf[1][k].append(None)
        if true_dist == True:
          f0 = w.pdf('f{0}'.format(k))
          f1 = w.pdf('f{0}'.format(j))
          if len(testdata.shape) > 1:
            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
          else:
            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
          pre_dist[0][k].append(f0dist)
          pre_dist[1][k].append(f1dist)
    indices = np.ones(testdata.shape[0], dtype=bool)
    ratiosList = []
    samples = []
    # This is needed for calibration of full ratios
    #for i,sample in enumerate(self.dataset_names):
    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))
    n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
    for i,cs in enumerate(csarray):
      ratiosList.append([])
      for j, cs2 in enumerate(csarray2):
        if weights_func <> None: 
          c1s = weights_func(cs,cs2)
          #print '{0} {1}'.format(cs,cs2)
          #print c1s
        else:
          c1s[:] = c1[:]
          c1s[c_eval] = cs
        if self.cross_section <> None:
          c1s = np.multiply(c1s,self.cross_section)
        n_eff = c1s.sum()
        n_tot = np.abs(c1s).sum()
        n_eff_ratio[i,j] = n_eff/n_tot 
        #print '{0} {1}'.format(i,j)
        #print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
        c1s = c1s/c1s.sum()
        #print c1s
        decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
        plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
        pre_evaluation=pre_pdf)
        decomposedRatios = 1./decomposedRatios
        #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
        #    c0,c1s,debug=debug,samples_data=samples,index=i) 
        #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
        #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
        #'composed calibrated'], dir=self.dir, model_g=self.model_g)
        ratiosList[i].append(decomposedRatios)
        #print('{0} {1} '.format(i,j)),
        #print decomposedRatios[decomposedRatios < 0.].shape 
        #print c1s
        #indices = np.logical_and(indices, decomposedRatios > 0.)
    for i,cs in enumerate(csarray):
      for j, cs2 in enumerate(csarray2):
        decomposedRatios = ratiosList[i][j]
        if use_log == False:
          if samples_ids <> None:
            ratios = decomposedRatios
            ids = samples_ids
            decomposedLikelihood[i,j] = (np.dot(np.log(ratios),
                np.array([c1[x] for x in ids]))).sum()
          else:
            #decomposedRatios[decomposedRatios < 0.] = 0.9
            decomposedRatios[decomposedRatios < 0.] = 1.0
            #decomposedRatios = decomposedRatios[self.findOutliers(decomposedRatios)]
            if n_eff_ratio[i,j] <= 0.5:
              #TODO: Harcoded number
              decomposedLikelihood[i,j] = 20000
            else:
              decomposedLikelihood[i,j] = -np.log(decomposedRatios).sum()
            #print decomposedLikelihood[i,j]
            #print '{0} {1} {2}'.format(i,j,decomposedLikelihood[i,j])
          trueLikelihood[i,j] = -np.log(trueRatios).sum()
        else:
          decomposedLikelihood[i,j] = decomposedRatios.sum()
          trueLikelihood[i,j] = trueRatios.sum()
      #print '\n {0}'.format(i)
    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
    decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
    # pixel plots
    #saveFig(csarray,[csarray2,decomposedLikelihood],makePlotName('comp','train',type='likelihood_g1g2'),labels=['composed'],pixel=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,contour=True,title='Likelihood fit for g1,g2')

    #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()] 
    X,Y = np.meshgrid(csarray, csarray2)

    saveFig(X,[Y,decomposedLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed'],contour=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=(csarray[decMin[0]],csarray2[decMin[1]]))
    #print decMin
    print [csarray[decMin[0]],csarray2[decMin[1]]]
    pdb.set_trace()
    if true_dist == True:
      trueLikelihood = trueLikelihood - trueLikelihood.min()
      trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
      return [[csarray[trueMin[0]],csarray2[trueMin[1]]],
          [csarray2[decMin[0],csarray2[decMin[1]]]]]
    else:
      return [[0.,0.],[csarray[decMin[0]],csarray2[decMin[1]]]]
Ejemplo n.º 8
0
  def fit(self, data_file='test',importance_sampling=False, true_dist=True,vars_g=None):
    ''' 
      Create pdfs for the classifier 
      score to be used later on the ratio 
      test, input workspace only needed in case 
      there exist true pdfs for the distributions
      the models being used are ./model/{model_g}/{c1_g}/{model_file}_i_j.pkl
      and the data files are ./data/{model_g}/{c1_g}/{data_file}_i_j.dat
    '''

    bins = 40
    low = 0.
    high = 1.  
    
    if self.input_workspace <> None:
      #f = ROOT.TFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
      f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
     
      w = f.Get('w')
      # TODO test this when workspace is present
      w = ROOT.RooWorkspace('w') if w == None else w
      f.Close()
    else: 
      w = ROOT.RooWorkspace('w')
    w.Print()

    print 'Generating Score Histograms'

    w.factory('score[{0},{1}]'.format(low,high))
    s = w.var('score')
    
    if importance_sampling == True:
      if true_dist == True:
        vars = ROOT.TList()
        for var in vars_g:
          vars.Add(w.var(var))
        x = ROOT.RooArgSet(vars)
      else:
        x = None

    #This is because most of the data of the full model concentrate around 0 
    bins_full = 40
    low_full = 0.
    high_full = 1.
    w.factory('scoref[{0},{1}]'.format(low_full, high_full))
    s_full = w.var('scoref')
    histos = []
    histos_names = []
    inv_histos = []
    inv_histos_names = []
    sums_histos = []
    def saveHistos(w,outputs,s,bins,low,high,pos=None,importance_sampling=False,importance_data=None,
          importance_outputs=None):
      if pos <> None:
        k,j = pos
      else:
        k,j = ('F0','F1')
      print 'Estimating {0} {1}'.format(k,j)
      for l,name in enumerate(['sig','bkg']):
        data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data",
            ROOT.RooArgSet(s))
        hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high)
        values = outputs[l]
        #values = values[self.findOutliers(values)]
        for val in values:
          hist.Fill(val)
          s.setVal(val)
          data.add(ROOT.RooArgSet(s))
        norm = 1./hist.Integral()
        hist.Scale(norm) 
          
        s.setBins(bins)
        datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist',
              ROOT.RooArgList(s),hist)
        #histpdf = ROOT.RooHistPdf('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
        #      ROOT.RooArgSet(s), datahist, 1)
        histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
              ROOT.RooArgSet(s), datahist, 1)
        #histpdf.setUnitNorm(True)
        #testvalues = np.array([self.evalDist(ROOT.RooArgSet(s), histpdf, [xs]) for xs in values])

        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')

        #print 'INTEGRAL'
        #print histpdf.createIntegral(ROOT.RooArgSet(s)).getVal()
        #print histpdf.Integral()
      
        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooAdaptiveGaussKronrodIntegrator1D')

        getattr(w,'import')(hist)
        getattr(w,'import')(data)
        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
        score_str = 'scoref' if pos == None else 'score'
        # Calculate the density of the classifier output using kernel density 
        #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str))

        # Print histograms pdfs and estimated densities
        if self.verbose_printing == True and name == 'bkg' and k <> j:
          full = 'full' if pos == None else 'dec'
          if k < j and k <> 'F0':
            histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
            histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])
          if j < k and k <> 'F0':
            inv_histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
            inv_histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])

    if self.scaler == None:
      self.scaler = {}

    # change this
    for k in range(self.nsamples):
      for j in range(self.nsamples):
        if k == j:
          continue
        #if k <> 2 and j <> 2:
        #  continue
        if self.dataset_names <> None:
          name_k, name_j = (self.dataset_names[k], self.dataset_names[j])
        else:
          name_k, name_j = (k,j)
        print 'Loading {0}:{1} {2}:{3}'.format(k,name_k, j,name_j)
        traindata, targetdata = loadData(data_file,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
            preprocessing=self.preprocessing,scaler=self.scaler,persist=True)
       
        numtrain = traindata.shape[0]       
        size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
        #output = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 1],model_g=self.model_g),
        #  predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 0],model_g=self.model_g)]
        output = [predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
              predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
        saveHistos(w,output,s,bins,low,high,(k,j))
        #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
        w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))

    if self.verbose_printing==True:
      for ind in range(1,(len(histos)/3+1)):
        print_histos = histos[(ind-1)*3:(ind-1)*3+3]
        print_histos_names = histos_names[(ind-1)*3:(ind-1)*3+3]
        printMultiFrame(w,['score']*len(print_histos),print_histos, makePlotName('dec{0}'.format(ind-1),'all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),print_histos_names,
          dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
    # Full model
    traindata, targetdata = loadData(data_file,self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g,
      preprocessing=self.preprocessing, scaler=self.scaler)
    numtrain = traindata.shape[0]       
    size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
    outputs = [predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
              predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
    #outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==1],model_g=self.model_g),
    #          predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==0],model_g=self.model_g)]

    saveHistos(w,outputs,s_full, bins_full, low_full, high_full,importance_sampling=False)
    if self.verbose_printing == True:
      printFrame(w,['scoref'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),['signal','bkg'],
    dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
   
    #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
    w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))
    
    w.Print()
Ejemplo n.º 9
0
  def evalC1Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None,coef_index=0):

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)
    else:
      x = None

    score = ROOT.RooArgSet(w.var('score'))
    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    csarray = np.linspace(c_min,c_max,npoints)
    decomposedLikelihood = np.zeros(npoints)
    trueLikelihood = np.zeros(npoints)
    c1s = np.zeros(c0.shape[0])
    pre_pdf = []
    pre_dist = []
    pre_pdf.extend([[],[]])
    pre_dist.extend([[],[]])
    # change this enumerates
    for k in enumerate(self.nsamples):
      pre_pdf[0].append([])
      pre_pdf[1].append([])
      pre_dist[0].append([])
      pre_dist[1].append([])
      for j in enumerate(self.nsamples):
        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
        if k <> j:
          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
          data = testdata
          if self.preprocessing == True:
            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
            self.dataset_names[max(index_k,index_j)],self.scaler) 
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          pre_pdf[0][k].append(f0pdfdist)
          pre_pdf[1][k].append(f1pdfdist)
        else:
          pre_pdf[0][k].append(None)
          pre_pdf[1][k].append(None)
        if true_dist == True:
          f0 = w.pdf('f{0}'.format(index_k))
          f1 = w.pdf('f{0}'.format(index_j))
          if len(testdata.shape) > 1:
            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
          else:
            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
          pre_dist[0][k].append(f0dist)
          pre_dist[1][k].append(f1dist)
    indices = np.ones(testdata.shape[0], dtype=bool)
    ratiosList = []
    samples = []
    # This is needed for calibration of full ratios
    #for i,sample in enumerate(self.dataset_names):
    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))

    #cross_section = self.cross_section / np.sum(self.cross_section)
    n_eff_ratio = np.zeros(csarray.shape[0])
    n_zeros = np.zeros(csarray.shape[0])
    cross_section = None
    for i,cs in enumerate(csarray):
      if weights_func <> None: 
        c1s = weights_func(cs,c1[1]) if coef_index == 0 else weights_func(c1[0],cs)
        print '{0} {1}'.format(cs, c1[1]) if coef_index == 0 else '{0} {1}'.format(c1[0],cs)
        print c1s
      else:
        c1s[:] = c1[:]
        c1s[c_eval] = cs
      if self.cross_section <> None:
        c1s = np.multiply(c1s,self.cross_section)
        #c1s = np.abs(c1s)
      n_eff = c1s.sum()
      n_tot = np.abs(c1s).sum()
      print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
      c1s = c1s/c1s.sum()
      decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
      plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
      pre_evaluation=pre_pdf,cross_section=cross_section)
      decomposedRatios = 1./decomposedRatios
      n_eff_ratio[i] = n_eff/n_tot
      n_zeros[i] = decomposedRatios[decomposedRatios < 0.].shape[0]
      print decomposedRatios[decomposedRatios < 0.].shape 
      #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
      #    c0,c1s,debug=debug,samples_data=samples,index=i) 
      #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
      #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
      #'composed calibrated'], dir=self.dir, model_g=self.model_g)
      ratiosList.append(decomposedRatios)
      #indices = np.logical_and(indices, decomposedRatios > 0.)
    for i,cs in enumerate(csarray):
      decomposedRatios = ratiosList[i]
      if use_log == False:
        if samples_ids <> None:
          ratios = decomposedRatios
          ids = samples_ids
          decomposedLikelihood[i] = (np.dot(np.log(ratios),
              np.array([c1[x] for x in ids]))).sum()
        else:
          decomposedRatios[decomposedRatios < 0.] = 1.0
          decomposedLikelihood[i] = -np.log(decomposedRatios).sum()
          print decomposedLikelihood[i]
          
        trueLikelihood[i] = -np.log(trueRatios).sum()
      else:
        decomposedLikelihood[i] = decomposedRatios.sum()
        trueLikelihood[i] = trueRatios.sum()
    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
    # print n_eff/n_zero relation
    #saveFig(csarray,[n_eff_ratio, n_zeros/n_zeros.max()],makePlotName('eff_ratio','zeros',type=post+'plot_g2'),labels=['n_eff/n_tot','zeros/{0}'.format(n_zeros.max())],axis=['g2','values'],marker=True,dir=self.dir,marker_value=c1[0],title='#zeros and n_eff/n_tot given g2',print_pdf=True,model_g=self.model_g)
    #saveFig(n_eff_ratio, [n_zeros/n_zeros.max()], makePlotName('eff_ratio','zeros',type='scat',
    #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['n_eff/n_tot', 
    #'#zeros/{0}'.format(n_zeros.max())], dir=self.dir, model_g=self.model_g,title='# zeros given n_eff/n_tot ratio')

    if true_dist == True:
      trueLikelihood = trueLikelihood - trueLikelihood.min()
      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
      return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()])
    else:
      saveFig(csarray,[decomposedLikelihood],makePlotName('comp','train',type='likelihood_g2'),labels=['decomposed'],axis=['g2','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[c_eval],title='g2 Fitting',print_pdf=True,model_g=self.model_g)
      pdb.set_trace()
      return (0.,csarray[decomposedLikelihood.argmin()])
Ejemplo n.º 10
0
  def computeRatios(self,true_dist=False, vars_g=None,
      data_file='test',use_log=False):
    '''
      Use the computed score densities to compute 
      the decomposed ratio test.
      set true_dist to True if workspace have the true distributions to 
      make plots, in that case vars_g also must be provided
      Final result is histogram for ratios and signal - bkf rejection curves
    '''

    f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
    w = f.Get('w')
    f.Close()

    
    #TODO: This are Harcoded for now
    c1 = self.c1
    c0 = self.c0
    #c1 = np.multiply(c1, self.cross_section)
    c1 = c1/c1.sum()
    c0 = c0/c0.sum()

    print 'Calculating ratios'

    npoints = 50

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)

    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    score = ROOT.RooArgSet(w.var('score'))
    scoref = ROOT.RooArgSet(w.var('scoref'))

    if use_log == True:
      getRatio = self.singleLogRatio
    else:
      getRatio = self.singleRatio
   
    if self.preprocessing == True:
      if self.scaler == None:
        self.scaler = {}
        for k in range(self.nsamples):
         for j in range(self.nsamples):
           if k < j:
            self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j]))
            

    # NN trained on complete model
    F0pdf = w.function('bkghistpdf_F0_F1')
    F1pdf = w.function('sighistpdf_F0_F1')

    # TODO Here assuming that signal is first dataset  
    testdata, testtarget = loadData(data_file,self.F0_dist,0,dir=self.dir,c1_g=self.c1_g,preprocessing=False) 
    if len(vars_g) == 1:
      xarray = np.linspace(0,5,npoints)
      fullRatios,_ = evaluateRatio(w,xarray,x=x,plotting=True,roc=False,true_dist=True)

      F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in xarray])
      F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in xarray])
      y2 = getRatio(F1dist, F0dist)

      # NN trained on complete model
      outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),xarray.reshape(xarray.shape[0],1),model_g=self.model_g,clf=self.clf)
      F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
      F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

      pdfratios = getRatio(F1fulldist, F0fulldist)

      saveFig(xarray, [fullRatios, y2, pdfratios], makePlotName('all','train',type='ratio'+post),title='Likelihood Ratios',labels=['Composed trained', 'True', 'Full trained'],print_pdf=True,dir=self.dir)
      
    if true_dist == True:
      decomposedRatio,_ = evaluateRatio(w,testdata,x=x,plotting=False,roc=self.verbose_printing,true_dist=True)
    else:
      decomposedRatio,_ = evaluateRatio(w,testdata,c0arr=c0,c1arr=c1,plotting=True,
      roc=True,data_type=data_file)
    if len(testdata.shape) > 1:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata,model_g=self.model_g,clf=self.clf)
      #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),testdata,model_g=self.model_g)

    else:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata.reshape(testdata.shape[0],1),model_g=self.model_g,clf=self.clf)

    F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
    F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

    completeRatio = getRatio(F1fulldist,F0fulldist)
    if true_dist == True:
      if len(testdata.shape) > 1:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),xs) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),xs) for xs in testdata])
      else:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in testdata])

      realRatio = getRatio(F1dist,F0dist)

    decomposed_target = testtarget
    complete_target = testtarget
    real_target = testtarget
    #Histogram F0-f0 for composed, full and true

    # Removing outliers
    numtest = decomposedRatio.shape[0] 
    #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]

    #decomposed_outliers = np.zeros(numtest,dtype=bool)
    #complete_outliers = np.zeros(numtest,dtype=bool)
    #decomposed_outliers = self.findOutliers(decomposedRatio)
    #complete_outliers = self.findOutliers(completeRatio)
    #decomposed_target = testtarget[decomposed_outliers] 
    #complete_target = testtarget[complete_outliers] 
    #decomposedRatio = decomposedRatio[decomposed_outliers]
    #completeRatio = completeRatio[complete_outliers]
    if true_dist == True:
      real_outliers = np.zeros(numtest,dtype=bool)
      real_outliers = self.findOutliers(realRatio)
      #real_target = testtarget[real_outliers] 
      #realRatio = realRatio[real_outliers]

    all_ratios_plots = []
    all_names_plots = []
    bins = 70
    low = 0.6
    high = 1.2
    if use_log == True:
      low = -1.0
      high = 1.0
    low = []
    high = []
    low = []
    high = []
    ratios_vars = []
    for l,name in enumerate(['sig','bkg']):
      if true_dist == True:
        ratios_names = ['truth','full','composed']
        ratios_vec = [realRatio, completeRatio, decomposedRatio]
        target_vec = [real_target, complete_target, decomposed_target] 

        minimum = min([realRatio[real_target == 1-l].min(), 
              completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([realRatio[real_target == 1-l].max(), 
              completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      else:
        ratios_names = ['full','composed']
        ratios_vec = [completeRatio, decomposedRatio]
        target_vec = [complete_target, decomposed_target] 
        minimum = min([completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      low.append(minimum - ((maximum - minimum) / bins)*10)
      high.append(maximum + ((maximum - minimum) / bins)*10)
      w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l]))
      ratios_vars.append(w.var('ratio{0}'.format(name)))
    for curr, curr_ratios, curr_targets in zip(ratios_names,ratios_vec,target_vec):
      numtest = curr_ratios.shape[0] 
      for l,name in enumerate(['sig','bkg']):
        hist = ROOT.TH1F('{0}_{1}hist_F0_f0'.format(curr,name),'hist',bins,low[l],high[l])
        for val in curr_ratios[curr_targets == 1-l]:
          hist.Fill(val)
        datahist = ROOT.RooDataHist('{0}_{1}datahist_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgList(ratios_vars[l]),hist)
        ratios_vars[l].setBins(bins)
        histpdf = ROOT.RooHistFunc('{0}_{1}histpdf_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgSet(ratios_vars[l]), datahist, 0)

        histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
        getattr(w,'import')(hist)
        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
        #print '{0} {1} {2}'.format(curr,name,hist.Integral())
        if name == 'bkg':
          all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(curr)),
                w.function('{0}_bkghistpdf_F0_f0'.format(curr))])
          all_names_plots.append(['sig_{0}'.format(curr),'bkg_{0}'.format(curr)])
        
    all_ratios_plots = [[all_ratios_plots[j][i] for j,_ in enumerate(all_ratios_plots)] 
                for i,_ in enumerate(all_ratios_plots[0])]
    all_names_plots = [[all_names_plots[j][i] for j,_ in enumerate(all_names_plots)] 
                for i,_ in enumerate(all_names_plots[0])]

    printMultiFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),all_names_plots,setLog=True,dir=self.dir,model_g=self.model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)

    # scatter plot true ratio - composed - full ratio

    #if self.verbose_printing == True and true_dist == True:
    #  saveFig(completeRatio,[realRatio], makePlotName('full','train',type='scat'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['full trained ratio','true ratio'],dir=self.dir,model_g=self.model_g)
    #  saveFig(decomposedRatio,[realRatio], makePlotName('comp','train',type='scat'+post,dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed trained ratio','true ratio'],dir=self.dir, model_g=self.model_g)
    # signal - bkg rejection plots
    if use_log == True:
      decomposedRatio = np.exp(decomposedRatio)
      completeRatio = np.exp(completeRatio)
      if true_dist == True:
        realRatio = np.exp(realRatio)
    if true_dist == True:

      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max(),
                    realRatio/realRatio.max()]
      targets_list = [decomposed_target, complete_target, real_target]
      legends_list = ['composed', 'full', 'true']
    else:

      indices = (decomposedRatio > 0.)
      decomposedRatio = decomposedRatio[indices] 
      decomposed_target = decomposed_target[indices]
      indices = (completeRatio > 0.)
      completeRatio = completeRatio[indices]
      complete_target = complete_target[indices]

      completeRatio = np.log(completeRatio)
      decomposedRatio = np.log(decomposedRatio)
      decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min())
      completeRatio = completeRatio + np.abs(completeRatio.min())
      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max()]
      targets_list = [decomposed_target, complete_target]
      legends_list = ['composed','full']
    makeSigBkg(ratios_list,targets_list,makePlotName('comp','all',type='sigbkg'+post,dir=self.dir,
          model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')

    # Scatter plot to compare regression function and classifier score
    if self.verbose_printing == True and true_dist == True:
      testdata, testtarget = loadData('test',self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g) 
      if len(testdata.shape) > 1:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),xs) for xs in testdata])
      else:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),[xs]) for xs in testdata])
      if len(testdata.shape) > 1:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],testdata.shape[1]),model_g=self.model_g, clf=self.clf)
      else:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],1),model_g=self.model_g, clf=self.clf)
Ejemplo n.º 11
0
  def evaluateDecomposedRatio(self,w,evalData,x=None,plotting=True, roc=False,gridsize=None,c0arr=None, c1arr=None,true_dist=False,pre_evaluation=None,pre_dist=None,data_type='test',debug=False,cross_section=None,indexes=None):
    '''
      Compute composed ratio for dataset 'evalData'.
      Single ratios can be precomputed in pre_evaluation
    '''

    # pair-wise ratios
    # and decomposition computation
    #f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
    #w = f.Get('w')
    #f.Close()

    if indexes == None:
      indexes = self.basis_indexes

    score = ROOT.RooArgSet(w.var('score'))
    npoints = evalData.shape[0]
    fullRatios = np.zeros(npoints)
    fullRatiosReal = np.zeros(npoints)
    c0arr = self.c0 if c0arr == None else c0arr
    c1arr = self.c1 if c1arr == None else c1arr

    true_score = []
    train_score = []
    all_targets = []
    all_positions = []
    all_ratios = []
    for k,c in enumerate(c0arr):
      innerRatios = np.zeros(npoints)
      innerTrueRatios = np.zeros(npoints)
      if c == 0:
        continue
      for j,c_ in enumerate(c1arr):
        index_k, index_j = (indexes[k],indexes[j])
        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
        f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
        if index_k<>index_j:
          if pre_evaluation == None:
            traindata = evalData
            if self.preprocessing == True:
              traindata = preProcessing(evalData,self.dataset_names[min(index_k,index_j)],
              self.dataset_names[max(index_k,index_j)],self.scaler) 
            outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata,model_g=self.model_g,clf=self.clf)
            #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
            #index_j),traindata,model_g=self.model_g)
            f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
            f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          else:
            f0pdfdist = pre_evaluation[0][index_k][index_j]
            f1pdfdist = pre_evaluation[1][index_k][index_j]
            if f0pdfdist == None or f1pdfdist == None:
              pdb.set_trace()
          pdfratios = self.singleRatio(f0pdfdist,f1pdfdist)
        else:
          pdfratios = np.ones(npoints) 
        all_ratios.append(pdfratios)
        innerRatios += (c_/c) * pdfratios
        if true_dist == True:
          if pre_dist == None:
            f0 = w.pdf('f{0}'.format(index_k))
            f1 = w.pdf('f{0}'.format(index_j))
            if len(evalData.shape) > 1:
              f0dist = np.array([self.evalDist(x,f0,xs) for xs in evalData])
              f1dist = np.array([self.evalDist(x,f1,xs) for xs in evalData])
            else:
              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in evalData])
              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in evalData])
          else:
            f0dist = pre_dist[0][index_k][index_j]
            f1dist = pre_dist[1][index_k][index_j]
          ratios = self.singleRatio(f0dist, f1dist)
          innerTrueRatios += (c_/c) * ratios
        # ROC curves for pair-wise ratios
        if (roc == True or plotting==True) and k < j:
          all_positions.append((k,j))
          if roc == True:
            if self.dataset_names <> None:
              name_k, name_j = (self.dataset_names[index_k], self.dataset_names[index_j])
            else:
              name_k, name_j = (index_k,index_j)
            testdata, testtarget = loadData(data_type,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
                  preprocessing=self.preprocessing, scaler=self.scaler) 
          else:
            testdata = evalData
          size2 = testdata.shape[1] if len(testdata.shape) > 1 else 1
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),testdata,model_g=self.model_g,clf=self.clf)
          #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
          #          index_j),testdata.reshape(testdata.shape[0],size2),model_g=self.model_g)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          clfRatios = self.singleRatio(f0pdfdist,f1pdfdist)
          train_score.append(clfRatios)
          if roc == True:
            all_targets.append(testtarget)
          #individual ROC
          #makeROC(clfRatios, testtarget,makePlotName('dec','train',k,j,type='roc',dir=self.dir,
          #model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
          if true_dist == True:
            if len(evalData.shape) > 1:
              f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
              f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
            else:
              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])

            trRatios = self.singleRatio(f0dist,f1dist)

            true_score.append(trRatios)
 
          #  makeROC(trRatios, testtarget, makePlotName('dec','truth',k,j,type='roc',
          #  dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
          

      innerRatios = 1./innerRatios
      innerRatios[np.abs(innerRatios) == np.inf] = 0.
      fullRatios += innerRatios
      if true_dist == True:
        innerTrueRatios = 1./innerTrueRatios
        innerTrueRatios[np.abs(innerTrueRatios) == np.inf] = 0.
        fullRatiosReal += innerTrueRatios
    if roc == True:
      for ind in range(1,(len(train_score)/3+1)):
        print_scores = train_score[(ind-1)*3:(ind-1)*3+3]
        print_targets = all_targets[(ind-1)*3:(ind-1)*3+3]
        print_positions = all_positions[(ind-1)*3:(ind-1)*3+3]
        if true_dist == True:
          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
          true_score = true_score,print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)
        else:
          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
          print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)

    if plotting == True:
      saveMultiFig(evalData,[x for x in zip(train_score,true_score)],
      makePlotName('all_dec','train',type='ratio'),labels=[['f0-f1(trained)','f0-f1(truth)'],['f0-f2(trained)','f0-f2(truth)'],['f1-f2(trained)','f1-f2(truth)']],title='Pairwise Ratios',print_pdf=True,dir=self.dir)

    return fullRatios,fullRatiosReal
def evalC1Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            c1_g='',model_g='mlp',use_log=False,true_dist=False, vars_g=None):

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  
  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)
  else:
    x = None

  score = ROOT.RooArgSet(w.var('score'))
  if use_log == True:
    evaluateRatio = test.evaluateLogDecomposedRatio
    post = 'log'
  else:
    evaluateRatio = test.evaluateDecomposedRatio
    post = ''

  npoints = 25
  csarray = np.linspace(0.01,0.10,npoints)
  testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,'test','F1'))
  decomposedLikelihood = np.zeros(npoints)
  trueLikelihood = np.zeros(npoints)
  c1s = np.zeros(c1.shape[0])
  pre_pdfratios = []
  pre_ratios = []
  for k,c0_ in enumerate(c0):
    pre_pdfratios.append([])
    pre_ratios.append([])
    for j,c1_ in enumerate(c1):
      if k <> j:
        f0pdf = w.pdf('bkghistpdf_{0}_{1}'.format(k,j))
        f1pdf = w.pdf('sighistpdf_{0}_{1}'.format(k,j))
        outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,
        'adaptive',k,j),testdata,model_g=model_g)
        pdfratios = [test.singleRatio(score,f0pdf,f1pdf,[xs]) for xs in outputs]
        pdfratios = np.array(pdfratios)
        pre_pdfratios[k].append(pdfratios)
      else:
        pre_pdfratios[k].append(None)
      if true_dist == True:          
        f0 = w.pdf('f{0}'.format(k))
        f1 = w.pdf('f{0}'.format(j))
        if len(testdata.shape) > 1:
          ratios = np.array([test.singleRatio(x,f0,f1,xs) for xs in testdata])
        else:
          ratios = np.array([test.singleRatio(x,f0,f1,[xs]) for xs in testdata])
        pre_ratios[k].append(ratios) 
  for i,cs in enumerate(csarray):
    c1s[:] = c1[:]
    c1s[0] = cs
    c1s = c1s/c1s.sum()
    decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
    plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_ratios=pre_ratios,
    pre_pdfratios=pre_pdfratios)
    if use_log == False:
      decomposedLikelihood[i] = np.log(decomposedRatios).sum()
      trueLikelihood[i] = np.log(trueRatios).sum()
    else:
      decomposedLikelihood[i] = decomposedRatios.sum()
      trueLikelihood[i] = trueRatios.sum()

  decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
  if true_dist == True:
    trueLikelihood = trueLikelihood - trueLikelihood.min() 
    saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood'),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=dir,
        marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
    return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()])
  else:
    return (0.,csarray[decomposedLikelihood.argmin()])
    def evalDoubleC1C2Likelihood(
            self,
            w,
            testdata,
            c0,
            c1,
            c_eval=0,
            c_min=0.01,
            c_max=0.2,
            use_log=False,
            true_dist=False,
            vars_g=None,
            npoints=50,
            samples_ids=None,
            weights_func=None):
        '''
          Find minimum of likelihood on testdata using decomposed
          ratios and the weighted orthogonal morphing method to find the bases
        '''

        if true_dist:
            vars = ROOT.TList()
            for var in vars_g:
                vars.Add(w.var(var))
            x = ROOT.RooArgSet(vars)
        else:
            x = None

        score = ROOT.RooArgSet(w.var('score'))
        if use_log:
            evaluateRatio = self.evaluateLogDecomposedRatio
            post = 'log'
        else:
            evaluateRatio = self.evaluateDecomposedRatio
            post = ''

        # Compute bases if they don't exist for this range
        if not os.path.isfile(
            '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0],
                c_min[1],
                c_max[0],
                c_max[1],
                npoints)):
            self.pre2DDoubleBasis(c_min=c_min, c_max=c_max, npoints=npoints)

        csarray = np.linspace(c_min[0], c_max[0], npoints)
        csarray2 = np.linspace(c_min[1], c_max[1], npoints)
        decomposedLikelihood = np.zeros((npoints, npoints))
        trueLikelihood = np.zeros((npoints, npoints))

        all_indexes = np.loadtxt(
            '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))
        all_indexes = np.array([[int(x) for x in rows]
                                for rows in all_indexes])
        all_couplings = np.loadtxt(
            '3doublecouplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))
        all_cross_sections = np.loadtxt(
            '3doublecrosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))

        # Bkg used in the fit
        # TODO: Harcoded this have to be changed
        basis_value = 1

        n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))

        # Pre evaluate the values for each distribution
        pre_pdf = [[range(self.nsamples) for _ in range(self.nsamples)], [
            range(self.nsamples) for _ in range(self.nsamples)]]
        pre_dist = [[range(self.nsamples) for _ in range(self.nsamples)], [
            range(self.nsamples) for _ in range(self.nsamples)]]
        # Only precompute distributions that will be used
        unique_indexes = set()
        for indexes in all_indexes:
            unique_indexes |= set(indexes)
        # change this enumerates
        unique_indexes = list(unique_indexes)
        for k in range(len(unique_indexes)):
            for j in range(len(unique_indexes)):
                index_k, index_j = (unique_indexes[k], unique_indexes[j])
                # This save some time by only evaluating the needed samples
                if index_k != basis_value:
                    continue
                print 'Pre computing {0} {1}'.format(index_k, index_j)
                if k != j:
                    f0pdf = w.function(
                        'bkghistpdf_{0}_{1}'.format(
                            index_k, index_j))
                    f1pdf = w.function(
                        'sighistpdf_{0}_{1}'.format(
                            index_k, index_j))
                    data = testdata
                    if self.preprocessing:
                        data = preProcessing(testdata, self.dataset_names[min(
                            k, j)], self.dataset_names[max(k, j)], self.scaler)
                    # outputs =
                    # predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
                    outputs = predict(
                        '/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(
                            self.model_file, index_k, index_j), data, model_g=self.model_g)
                    f0pdfdist = np.array(
                        [self.evalDist(score, f0pdf, [xs]) for xs in outputs])
                    f1pdfdist = np.array(
                        [self.evalDist(score, f1pdf, [xs]) for xs in outputs])
                    pre_pdf[0][index_k][index_j] = f0pdfdist
                    pre_pdf[1][index_k][index_j] = f1pdfdist
                else:
                    pre_pdf[0][index_k][index_j] = None
                    pre_pdf[1][index_k][index_j] = None
                if true_dist:
                    f0 = w.pdf('f{0}'.format(index_k))
                    f1 = w.pdf('f{0}'.format(index_j))
                    if len(testdata.shape) > 1:
                        f0dist = np.array([self.evalDist(x, f0, xs)
                                           for xs in testdata])
                        f1dist = np.array([self.evalDist(x, f1, xs)
                                           for xs in testdata])
                    else:
                        f0dist = np.array([self.evalDist(x, f0, [xs])
                                           for xs in testdata])
                        f1dist = np.array([self.evalDist(x, f1, [xs])
                                           for xs in testdata])
                    pre_dist[0][index_k][index_j] = f0dist
                    pre_dist[1][index_k][index_j] = f1dist

        indices = np.ones(testdata.shape[0], dtype=bool)
        ratiosList = []
        samples = []
        # Usefull values to inspect after the training
        alpha = np.zeros([csarray.shape[0], csarray2.shape[0], 2])
        n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_tot_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_tot_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_zeros = np.zeros((npoints, npoints))
        target = self.F1_couplings[:]

        def compute_one_alpha_part(weights, xs):
            c1s_1 = np.multiply(weights,xs)
            c1s_1 = np.multiply(weights,c1s_1)
            alpha1 = c1s_1.sum()
            return alpha1
        
        exp_basis_weights = True
        for i, cs in enumerate(csarray):
            ratiosList.append([])
            for j, cs2 in enumerate(csarray2):
                target[1] = cs
                target[2] = cs2
                print '{0} {1}'.format(i, j)
                print target

                # Compute F1 couplings and cross sections
                c1s_1 = all_couplings[i * npoints + j]
                cross_section_1 = all_cross_sections[i * npoints + j]
                c1s_1 = np.multiply(c1s_1, cross_section_1)
                n_eff = c1s_1.sum()
                n_tot = np.abs(c1s_1).sum()
                n_eff_1 = n_eff / n_tot
                n_eff_1s[i, j] = n_eff_1
                n_tot_1s[i, j] = n_tot
                print 'n_eff 1: {0}'.format(n_eff / n_tot)
                c1s_1 = c1s_1 / c1s_1.sum()

                c1s_2 = all_couplings[npoints * npoints + i * npoints + j]
                cross_section_2 = all_cross_sections[
                    npoints * npoints + i * npoints + j]
                c1s_2 = np.multiply(c1s_2, cross_section_2)
                n_eff = c1s_2.sum()
                n_tot = np.abs(c1s_2).sum()
                n_eff_2 = n_eff / n_tot
                n_eff_2s[i, j] = n_eff_2
                n_tot_2s[i, j] = n_tot
                print 'n_eff 2: {0}'.format(n_eff / n_tot)
                c1s_2 = c1s_2 / c1s_2.sum()

                if exp_basis_weights == True:
                    neff2 = 1./n_eff_2
                    neff1 = 1./n_eff_1
                    #alpha1 = np.exp(-np.sqrt(neff1))
                    #alpha2 = np.exp(-np.sqrt(neff2))
                    alpha1 = np.exp(-neff1**(1./3.))
                    alpha2 = np.exp(-neff2**(1./3.))
                    alpha[i,j,0] = alpha1/(alpha1 + alpha2)
                    alpha[i,j,1] = alpha2/(alpha1 + alpha2)
                else:
                    alpha1 = compute_one_alpha_part(all_couplings[i*npoints + j],
                                                    all_cross_sections[i*npoints + j])
                    alpha2 = compute_one_alpha_part(all_couplings[npoints*npoints 
                             + i*npoints + j], all_cross_sections[npoints*npoints + i*npoints + j])
                            
                    alpha[i,j,0] = (1/2.)*(alpha2/(alpha1+alpha2))
                    alpha[i,j,1] = (1/2.)*(alpha1/(alpha1+alpha2))


                # Compute Bkg weights
                c0_arr_1 = np.zeros(15)
                c0_arr_2 = np.zeros(15)
                c0_arr_1[np.where(all_indexes[0] == basis_value)[0][0]] = 1.
                c0_arr_2[np.where(all_indexes[1] == basis_value)[0][0]] = 1.

                c0_arr_1 = c0_arr_1 / c0_arr_1.sum()
                c0_arr_2 = c0_arr_2 / c0_arr_2.sum()

                c1s = np.append(alpha[i, j, 0] * c1s_1, alpha[i, j, 1] * c1s_2)
                c0_arr = np.append(0.5 * c0_arr_1, 0.5 * c0_arr_2)

                print c0_arr

                cross_section = np.append(cross_section_1, cross_section_2)
                indexes = np.append(all_indexes[0], all_indexes[1])
                completeRatios, trueRatios = evaluateRatio(w, testdata, x=x,
                                                           plotting=False, roc=False, c0arr=c0_arr, c1arr=c1s, true_dist=true_dist,
                                                           pre_dist=pre_dist, pre_evaluation=pre_pdf, cross_section=cross_section,
                                                           indexes=indexes)
                completeRatios = 1. / completeRatios

                print completeRatios[completeRatios < 0.].shape
                n_zeros[i, j] = completeRatios[completeRatios < 0.].shape[0]
                ratiosList[i].append(completeRatios)
                n_eff_ratio[i,j] = (alpha[i,j,0] * n_eff_1 +
                    alpha[i,j,1] * n_eff_2)

                print 'total eff: {0}'.format(n_eff_ratio[i, j])
                if n_eff_ratio[i, j] > 0.05:
                    indices = np.logical_and(indices, completeRatios > 0.)
        print indices[indices].shape[0]
        for i, cs in enumerate(csarray):
            for j, cs2 in enumerate(csarray2):

                completeRatios = ratiosList[i][j]
                completeRatios = completeRatios[indices]
                if not use_log:
                    norm = completeRatios[completeRatios != 0.].shape[0]
                    if n_eff_ratio[i, j] < 0.05:
                        # TODO: Harcoded number
                        decomposedLikelihood[i, j] = 20000
                    else:
                        decomposedLikelihood[
                            i, j] = -2.*np.log(completeRatios).sum()
                else:
                    decomposedLikelihood[i, j] = completeRatios.sum()
                    trueLikelihood[i, j] = trueRatios.sum()
        decomposedLikelihood[decomposedLikelihood == 20000] = decomposedLikelihood[
            decomposedLikelihood != 20000].max()
        decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
        decMin = np.unravel_index(
            decomposedLikelihood.argmin(),
            decomposedLikelihood.shape)

        # Plotting
        # pixel plots
        saveFig(csarray,
                [csarray2,
                 n_eff_1s / n_eff_2s],
                makePlotName('comp',
                             'train',
                             type='n_eff_ratio'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_rat_1/n_rat_2 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_ratio],
                makePlotName('comp',
                             'train',
                             type='n_eff'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff/n_tot sum values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_1s],
                makePlotName('comp',
                             'train',
                             type='n_eff1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff_1 ratio values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_2s],
                makePlotName('comp',
                             'train',
                             type='n_eff2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff_2 ratiovalues for g1,g2')

        saveFig(csarray,
                [csarray2,
                 alpha[:,
                       :,
                       0]],
                makePlotName('comp',
                             'train',
                             type='alpha1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='weights_1 ratio values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 alpha[:,
                       :,
                       1]],
                makePlotName('comp',
                             'train',
                             type='alpha2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='weights_2 ratiovalues for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_tot_1s],
                makePlotName('comp',
                             'train',
                             type='n_tot1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_tot_1 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_tot_2s],
                makePlotName('comp',
                             'train',
                             type='n_tot2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_tot_2 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_zeros],
                makePlotName('comp',
                             'train',
                             type='n_zeros'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_zeros values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 decomposedLikelihood],
                makePlotName('comp',
                             'train',
                             type='pixel_g1g2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='Likelihood fit for g1,g2')

        #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()]
        X, Y = np.meshgrid(csarray, csarray2)

        saveFig(
            X,
            [
                Y,
                decomposedLikelihood],
            makePlotName(
                'comp',
                'train',
                type='multilikelihood_{0:.2f}_{1:.2f}'.format(
                    self.F1_couplings[1],
                    self.F1_couplings[2])),
            labels=['composed'],
            contour=True,
            marker=True,
            dir=self.dir,
            model_g=self.model_g,
            marker_value=(
                self.F1_couplings[1],
                self.F1_couplings[2]),
            print_pdf=True,
            min_value=(
                csarray[
                    decMin[0]],
                csarray2[
                    decMin[1]]))
        # print decMin
        print [csarray[decMin[0]], csarray2[decMin[1]]]
        if true_dist:
            trueLikelihood = trueLikelihood - trueLikelihood.min()
            trueMin = np.unravel_index(
                trueLikelihood.argmin(), trueLikelihood.shape)
            saveFig(csarray,
                    [decomposedLikelihood,
                     trueLikelihood],
                    makePlotName('comp',
                                 'train',
                                 type=post + 'likelihood_{0}'.format(n_sample)),
                    labels=['decomposed',
                            'true'],
                    axis=['c1[0]',
                          '-ln(L)'],
                    marker=True,
                    dir=self.dir,
                    marker_value=c1[0],
                    title='c1[0] Fitting',
                    print_pdf=True)
            return [[csarray[trueMin[0]], csarray2[trueMin[1]]],
                    [csarray2[decMin[0], csarray2[decMin[1]]]]]
        else:
            return [[0., 0.], [csarray[decMin[0]], csarray2[decMin[1]]]]