コード例 #1
0
def plotCValues(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            use_log=False):
  if use_log == True:
    post = 'log'
  else:
    post = ''

  n_hist_c = 200
  keys = ['true','dec']
  c1_values = dict((key,np.zeros(n_hist_c)) for key in keys)
  c2_values = dict((key,np.zeros(n_hist_c)) for key in keys)
  c1_2 = np.loadtxt('{0}/fitting_values_c1c2{1}.txt'.format(dir,post))
  c1_values['true'] = c1_2[:,0]
  c1_values['dec'] = c1_2[:,1]
  c2_values['true'] = c1_2[:,2]
  c2_values['dec'] = c1_2[:,3]
  
  saveFig([],[c1_values['true'],c1_values['dec']], 
      makePlotName('c1c2','train',type='c1_hist{0}'.format(post)),hist=True, 
      axis=['signal weight'],marker=True,marker_value=c1[0],
      labels=['true','composed'],x_range=[0.,0.2],dir=dir,
      model_g=model_g,title='Histogram for estimated values signal weight',print_pdf=True)
  saveFig([],[c2_values['true'],c2_values['dec']], 
      makePlotName('c1c2','train',type='c2_hist{0}'.format(post)),hist=True, 
      axis=['bkg. weight'],marker=True,marker_value=c1[1],
      labels=['true','composed'],x_range=[0.1,0.4],dir=dir,
      model_g=model_g,title='Histogram for estimated values bkg. weight',print_pdf=True)
コード例 #2
0
def drawFigure1():
    """Draws Figure 1 (teaser diagram in the introduction)."""

    fig = plt.figure(figsize=(9, 4))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    sns.barplot(ax=ax1,
                x="footprint [GiB]",
                y="cs",
                order=["Uncompr", "ActualBestBaseMem", "ActualBestMem"],
                data=dfMemMorphStore.query("query == 'avg'"))
    sns.barplot(ax=ax2,
                x="runtime [s]",
                y="cs",
                order=["Uncompr", "ActualBestBasePerf", "ActualBestPerf"],
                data=dfPerfMorphStore.query("query == 'avg'"))
    for ax in [ax1, ax2]:
        ax.set_yticklabels([
            "No\ncompression\nat all", "Established\nbase data\ncompression",
            "Our novel\ncontinuous\ncompression"
        ])
        ax.set_ylabel(None)
    ax2.set_yticks([])
    fig.tight_layout()
    sns.despine()
    utils.saveFig("figure01_teaser")
コード例 #3
0
def drawFigure9():
    """Draws Figure 9 (comparision of MorphStore and MonetDB)."""

    dfs = []

    if useMorphStore:
        df = dfPerfMorphStore.query(
            "(cs in ['ActualBestPerf', 'Uncompr', 'UncomprScalar', 'ActualBestBasePerf'])"
            .format(scaleFactor))[["query", "ps", "cs", "runtime [s]"]].copy()
        df["candidate"] = df.apply(
            lambda row: "MorphStore {} {}".format(row["ps"], row["cs"]),
            axis=1)
        dfs.append(df)
    if useMonetDB:
        for intType in intTypesMonetDB:
            df = dfPerfMonetDB[intType]
            df["candidate"] = "MonetDB scalar {}".format(intType)
            dfs.append(df)

    dfComp = pd.concat(dfs)

    if useMorphStore:
        colors = [colorYellow, colorOrange, colorRed]
        order = [
            "MorphStore scalar UncomprScalar",
            "MorphStore {} Uncompr".format(psNames[processingStyle]),
            "MorphStore {} ActualBestPerf".format(psNames[processingStyle]),
        ]
        labels = [
            "MorphStore\nscalar\nuncompr.",
            "MorphStore\n{}\nuncompr.".format(psNames[processingStyle]),
            "MorphStore\n{}\ncontinuous compr.".format(
                psNames[processingStyle]),
        ]
    else:
        colors = []
        order = []
        labels = []
    if useMonetDB:
        colors = [colorCyan, *colors, colorBlue]
        order = [
            "MonetDB scalar BIGINT",
            *order,
            "MonetDB scalar tight",
        ]
        labels = [
            "MonetDB\nscalar\nuncompr.",
            *labels,
            "MonetDB\nscalar\nnarrow types",
        ]

    filename = "figure09_morphstore_vs_monetdb"

    _drawDia("candidate", order, colors, None, dfComp, 3.09)
    ax = plt.gca()
    ax.set_title(ax.get_title()[4:])  # remove the letter "(a)" in the title
    utils.saveFig(filename)

    utils.drawLegendRect(labels, colors)
    utils.saveFig(filename + "_legend")
コード例 #4
0
def drawFigure7():
    """Draws Figure 7 (impact of the format combination)."""

    colors = [colorRed, colorGray, colorBlue, colorGreen]
    order = ["ActualWorst{}", "Uncompr", "StaticBP32", "ActualBest{}"]
    labels = [
        "worst combination", "uncompressed", "Static-BP-32", "best combination"
    ]

    filename = "figure07_ssb_formats"

    _drawDia("cs", order, colors, dfMemMorphStore, dfPerfMorphStore)
    utils.saveFig(filename)

    utils.drawLegendRect(labels, colors)
    utils.saveFig(filename + "_legend")
コード例 #5
0
def drawFigure10():
    """Draws Figure 10 (fitness of our cost-based format selection)."""

    colors = [colorRed, colorGray, colorYellow, colorGreen]
    order = ["ActualWorst{}", "Uncompr", "CostBasedBest{}", "ActualBest{}"]
    labels = [
        "worst combination", "uncompressed", "cost-based", "best combination"
    ]

    filename = "figure10_opt"

    _drawDia("cs", order, colors, dfMemMorphStore, dfPerfMorphStore)
    utils.saveFig(filename)

    utils.drawLegendRect(labels, colors)
    utils.saveFig(filename + "_legend")
コード例 #6
0
def drawFigure8():
    """Draws Figure 8 (compression of base data vs. intermediates)."""

    colors = [colorGray, colorCyan, colorYellow]
    order = ["Uncompr", "ActualBestBase{}", "ActualBest{}"]
    labels = [
        "uncompressed", "+ compressed base columns",
        "+ compressed intermediates"
    ]

    filename = "figure08_ssb_base_vs_interm"

    _drawDia("cs", order, colors, dfMemMorphStore, dfPerfMorphStore)
    utils.saveFig(filename)

    utils.drawLegendRect(labels, colors)
    utils.saveFig(filename + "_legend")
コード例 #7
0
def drawFigure5(dfMea):
    """
    Draws Figure 5 (experiment on a single on-the-fly de/re-compression
    operator)
    """

    # Create the main figure.
    fig = plt.figure(figsize=(10, 4))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    # Plot the data.
    for diaIdx, (ax, sel) in enumerate([
        (ax1, 0.01),
        (ax2, 0.9),
    ]):
        sns.swarmplot(ax=ax,
                      y="runtime [ms]",
                      x="col",
                      hue="class",
                      hue_order=["alluncompr", "outuncompr", "outcompr"],
                      palette=["red", "blue", "silver"],
                      data=dfMea.query("sel == {}".format(sel)))
        ax.set_title("({}) {:.0%} selectivity".format(chr(ord("a") + diaIdx),
                                                      sel))
        ax.set_xlabel("input column")
        ax.set_ylim(bottom=0)
        ax.get_legend().remove()

    # Some post-processing.
    ax2.set_ylabel(None)
    sns.despine()
    fig.tight_layout()

    filename = "figure5_singleop"

    # Save the main figure.
    utils.saveFig(filename)

    utils.drawLegendMarker([
        "uncompressed", "only input compressed", "input and output compressed"
    ], ["red", "blue", "silver"])
    utils.saveFig(filename + "_legend")
コード例 #8
0
def plotCValues(c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            use_log=False, n_hist=150,c_eval=0, range_min=-1.0,range_max=0.):
  if use_log == True:
    post = 'log'
  else:
    post = ''

  keys = ['true','dec']
  c1_ = dict((key,np.zeros(n_hist)) for key in keys)
  c1_values = dict((key,np.zeros(n_hist)) for key in keys)
  c2_values = dict((key,np.zeros(n_hist)) for key in keys)
  c1_1 = np.loadtxt('{0}/fitting_values_c1.txt'.format(dir))  
  c1_['true'] = c1_1[:,0]
  c1_['dec'] = c1_1[:,1]
  if true_dist == True:
    vals = [c1_['true'],c1_['dec']]
    labels = ['true','dec']
  else:
    vals = c1_['dec']
    vals1 = c1_1[:,3]
    labels = ['dec']
  #vals = vals[vals <> 0.5]
  #vals = vals[vals <> 1.4]
  #vals1 = vals1[vals1 <> 1.1]
  #vals1 = vals1[vals1 <> 1.7]
  size = min(vals.shape[0],vals1.shape[0])
  #saveFig([],[vals1], 
  #    makePlotName('g2','train',type='hist_g1g2'),hist=True, 
  #    axis=['g2'],marker=True,marker_value=c1[c_eval],
  #    labels=labels,x_range=[range_min,range_max],dir=dir,
  #    model_g=model_g,title='Histogram for fitted g2', print_pdf=True)
  saveFig([],[vals,vals1], 
      makePlotName('g1g2','train',type='hist'),hist=True,hist2D=True, 
      axis=['g1','g2'],marker=True,marker_value=c1,
      labels=labels,dir=dir,model_g=model_g,title='2D Histogram for fitted g1,g2', print_pdf=True,
      x_range=[[0.5,1.4],[1.1,1.9]])
コード例 #9
0
def drawFigure6(dfMea):
    """Draws Figure 6 (experiment on a simple query)"""

    colors = ["#bfbfbf", "#7cc8ec", "#868ad1", "#f8d35e", "#f47264"]

    # Create the main figure.
    fig = plt.figure(figsize=(10, 4))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    filename = "figure6_simplequery"

    # For the memory footprints.
    _drawStackedBars(dfMea, ["inDataX", "inDataY", "midPosXC", "midDataYC"],
                     " [GiB]", "memory footprint", "column", {
                         "inDataX": "X",
                         "inDataY": "Y",
                         "midPosXC": "X'",
                         "midDataYC": "Y'",
                     }, "a", ax1, colors)
    # For the runtimes.
    _drawStackedBars(dfMea, ["select", "project", "agg_sum"], " [s]",
                     "runtime", "operator", {
                         "select": "select",
                         "project": "project",
                         "agg_sum": "sum",
                     }, "b", ax2, colors)

    fig.tight_layout()
    utils.saveFig(filename)

    # Create the stand-alone legend.
    utils.drawLegendRect([
        "uncompr.\nuncompr.", "uncompr.\nstatic BP", "static BP\nstatic BP",
        "DELTA + SIMD-BP\nstatic BP", "FOR + SIMD-BP\nstatic BP"
    ], colors)
    utils.saveFig(filename + "_legend")
コード例 #10
0
def CrossSectionCheck2D(dir,c1_g,model_g,data_files,f1_dist,accept_list,c_min,c_max,npoints,n_eval,feature):
  ''' 
    2D likelihood plots for a single feature
  '''

  # 2D version
  csarray = np.linspace(c_min[0],c_max[0],npoints)
  csarray2 = np.linspace(c_min[1], c_max[1], npoints)

  all_indexes = np.loadtxt('3indexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) 
  all_indexes = np.array([int(x) for x in all_indexes])
  all_couplings = np.loadtxt('3couplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) 
  all_cross_sections = np.loadtxt('3crosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints))

  basis_files = [data_files[i] for i in all_indexes]
  samplesdata = []
  data_file='data'
  for i,sample in enumerate(basis_files):
    samplesdata.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,sample)))

  print all_indexes
  targetdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,f1_dist))
 
  likelihoods = np.zeros((npoints,npoints))
  n_effs = np.zeros((npoints,npoints))
  n_zeros = np.zeros((npoints,npoints))

  for k,cs in enumerate(csarray):
    for j,cs2 in enumerate(csarray2):
      likelihood,n_eff,n_zero = checkCrossSection(all_couplings[k*npoints+j],all_cross_sections[k*npoints + j],basis_files,f1_dist,
              dir,c1_g,model_g,feature=feature,targetdata=targetdata,samplesdata=samplesdata)
      likelihoods[k,j] = likelihood
      n_effs[k,j] = n_eff
      n_zeros[k,j] = n_zero
  #print likelihoods
  saveFig(csarray,[csarray2,likelihoods],makePlotName('feature{0}'.format(25),'train',type='pixel_g1g2'),labels=['composed'],pixel=True,marker=True,dir=dir,model_g=model_g,marker_value=(1.0,0.5),print_pdf=True,contour=True,title='Feature for g1,g2')
コード例 #11
0
  def evalC1C2Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None):

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)
    else:
      x = None

    score = ROOT.RooArgSet(w.var('score'))
    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    csarray = np.linspace(c_min[0],c_max[0],npoints)
    csarray2 = np.linspace(c_min[1], c_max[1], npoints)
    decomposedLikelihood = np.zeros((npoints,npoints))
    trueLikelihood = np.zeros((npoints,npoints))
    c1s = np.zeros(c0.shape[0])
    pre_pdf = []
    pre_dist = []
    pre_pdf.extend([[],[]])
    pre_dist.extend([[],[]])
    # change this enumerates
    for k,c0_ in enumerate(c0):
      pre_pdf[0].append([])
      pre_pdf[1].append([])
      pre_dist[0].append([])
      pre_dist[1].append([])
      for j,c1_ in enumerate(c0):
        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
        if k <> j:
          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
          data = testdata
          if self.preprocessing == True:
            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
            self.dataset_names[max(index_k,index_j)],self.scaler) 
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          pre_pdf[0][k].append(f0pdfdist)
          pre_pdf[1][k].append(f1pdfdist)
        else:
          pre_pdf[0][k].append(None)
          pre_pdf[1][k].append(None)
        if true_dist == True:
          f0 = w.pdf('f{0}'.format(k))
          f1 = w.pdf('f{0}'.format(j))
          if len(testdata.shape) > 1:
            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
          else:
            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
          pre_dist[0][k].append(f0dist)
          pre_dist[1][k].append(f1dist)
    indices = np.ones(testdata.shape[0], dtype=bool)
    ratiosList = []
    samples = []
    # This is needed for calibration of full ratios
    #for i,sample in enumerate(self.dataset_names):
    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))
    n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
    for i,cs in enumerate(csarray):
      ratiosList.append([])
      for j, cs2 in enumerate(csarray2):
        if weights_func <> None: 
          c1s = weights_func(cs,cs2)
          #print '{0} {1}'.format(cs,cs2)
          #print c1s
        else:
          c1s[:] = c1[:]
          c1s[c_eval] = cs
        if self.cross_section <> None:
          c1s = np.multiply(c1s,self.cross_section)
        n_eff = c1s.sum()
        n_tot = np.abs(c1s).sum()
        n_eff_ratio[i,j] = n_eff/n_tot 
        #print '{0} {1}'.format(i,j)
        #print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
        c1s = c1s/c1s.sum()
        #print c1s
        decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
        plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
        pre_evaluation=pre_pdf)
        decomposedRatios = 1./decomposedRatios
        #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
        #    c0,c1s,debug=debug,samples_data=samples,index=i) 
        #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
        #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
        #'composed calibrated'], dir=self.dir, model_g=self.model_g)
        ratiosList[i].append(decomposedRatios)
        #print('{0} {1} '.format(i,j)),
        #print decomposedRatios[decomposedRatios < 0.].shape 
        #print c1s
        #indices = np.logical_and(indices, decomposedRatios > 0.)
    for i,cs in enumerate(csarray):
      for j, cs2 in enumerate(csarray2):
        decomposedRatios = ratiosList[i][j]
        if use_log == False:
          if samples_ids <> None:
            ratios = decomposedRatios
            ids = samples_ids
            decomposedLikelihood[i,j] = (np.dot(np.log(ratios),
                np.array([c1[x] for x in ids]))).sum()
          else:
            #decomposedRatios[decomposedRatios < 0.] = 0.9
            decomposedRatios[decomposedRatios < 0.] = 1.0
            #decomposedRatios = decomposedRatios[self.findOutliers(decomposedRatios)]
            if n_eff_ratio[i,j] <= 0.5:
              #TODO: Harcoded number
              decomposedLikelihood[i,j] = 20000
            else:
              decomposedLikelihood[i,j] = -np.log(decomposedRatios).sum()
            #print decomposedLikelihood[i,j]
            #print '{0} {1} {2}'.format(i,j,decomposedLikelihood[i,j])
          trueLikelihood[i,j] = -np.log(trueRatios).sum()
        else:
          decomposedLikelihood[i,j] = decomposedRatios.sum()
          trueLikelihood[i,j] = trueRatios.sum()
      #print '\n {0}'.format(i)
    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
    decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
    # pixel plots
    #saveFig(csarray,[csarray2,decomposedLikelihood],makePlotName('comp','train',type='likelihood_g1g2'),labels=['composed'],pixel=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,contour=True,title='Likelihood fit for g1,g2')

    #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()] 
    X,Y = np.meshgrid(csarray, csarray2)

    saveFig(X,[Y,decomposedLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed'],contour=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=(csarray[decMin[0]],csarray2[decMin[1]]))
    #print decMin
    print [csarray[decMin[0]],csarray2[decMin[1]]]
    pdb.set_trace()
    if true_dist == True:
      trueLikelihood = trueLikelihood - trueLikelihood.min()
      trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
      return [[csarray[trueMin[0]],csarray2[trueMin[1]]],
          [csarray2[decMin[0],csarray2[decMin[1]]]]]
    else:
      return [[0.,0.],[csarray[decMin[0]],csarray2[decMin[1]]]]
コード例 #12
0
    def evalDoubleC1C2Likelihood(
            self,
            w,
            testdata,
            c0,
            c1,
            c_eval=0,
            c_min=0.01,
            c_max=0.2,
            use_log=False,
            true_dist=False,
            vars_g=None,
            npoints=50,
            samples_ids=None,
            weights_func=None):
        '''
          Find minimum of likelihood on testdata using decomposed
          ratios and the weighted orthogonal morphing method to find the bases
        '''

        if true_dist:
            vars = ROOT.TList()
            for var in vars_g:
                vars.Add(w.var(var))
            x = ROOT.RooArgSet(vars)
        else:
            x = None

        score = ROOT.RooArgSet(w.var('score'))
        if use_log:
            evaluateRatio = self.evaluateLogDecomposedRatio
            post = 'log'
        else:
            evaluateRatio = self.evaluateDecomposedRatio
            post = ''

        # Compute bases if they don't exist for this range
        if not os.path.isfile(
            '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0],
                c_min[1],
                c_max[0],
                c_max[1],
                npoints)):
            self.pre2DDoubleBasis(c_min=c_min, c_max=c_max, npoints=npoints)

        csarray = np.linspace(c_min[0], c_max[0], npoints)
        csarray2 = np.linspace(c_min[1], c_max[1], npoints)
        decomposedLikelihood = np.zeros((npoints, npoints))
        trueLikelihood = np.zeros((npoints, npoints))

        all_indexes = np.loadtxt(
            '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))
        all_indexes = np.array([[int(x) for x in rows]
                                for rows in all_indexes])
        all_couplings = np.loadtxt(
            '3doublecouplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))
        all_cross_sections = np.loadtxt(
            '3doublecrosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(
                c_min[0], c_min[1], c_max[0], c_max[1], npoints))

        # Bkg used in the fit
        # TODO: Harcoded this have to be changed
        basis_value = 1

        n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))

        # Pre evaluate the values for each distribution
        pre_pdf = [[range(self.nsamples) for _ in range(self.nsamples)], [
            range(self.nsamples) for _ in range(self.nsamples)]]
        pre_dist = [[range(self.nsamples) for _ in range(self.nsamples)], [
            range(self.nsamples) for _ in range(self.nsamples)]]
        # Only precompute distributions that will be used
        unique_indexes = set()
        for indexes in all_indexes:
            unique_indexes |= set(indexes)
        # change this enumerates
        unique_indexes = list(unique_indexes)
        for k in range(len(unique_indexes)):
            for j in range(len(unique_indexes)):
                index_k, index_j = (unique_indexes[k], unique_indexes[j])
                # This save some time by only evaluating the needed samples
                if index_k != basis_value:
                    continue
                print 'Pre computing {0} {1}'.format(index_k, index_j)
                if k != j:
                    f0pdf = w.function(
                        'bkghistpdf_{0}_{1}'.format(
                            index_k, index_j))
                    f1pdf = w.function(
                        'sighistpdf_{0}_{1}'.format(
                            index_k, index_j))
                    data = testdata
                    if self.preprocessing:
                        data = preProcessing(testdata, self.dataset_names[min(
                            k, j)], self.dataset_names[max(k, j)], self.scaler)
                    # outputs =
                    # predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
                    outputs = predict(
                        '/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(
                            self.model_file, index_k, index_j), data, model_g=self.model_g)
                    f0pdfdist = np.array(
                        [self.evalDist(score, f0pdf, [xs]) for xs in outputs])
                    f1pdfdist = np.array(
                        [self.evalDist(score, f1pdf, [xs]) for xs in outputs])
                    pre_pdf[0][index_k][index_j] = f0pdfdist
                    pre_pdf[1][index_k][index_j] = f1pdfdist
                else:
                    pre_pdf[0][index_k][index_j] = None
                    pre_pdf[1][index_k][index_j] = None
                if true_dist:
                    f0 = w.pdf('f{0}'.format(index_k))
                    f1 = w.pdf('f{0}'.format(index_j))
                    if len(testdata.shape) > 1:
                        f0dist = np.array([self.evalDist(x, f0, xs)
                                           for xs in testdata])
                        f1dist = np.array([self.evalDist(x, f1, xs)
                                           for xs in testdata])
                    else:
                        f0dist = np.array([self.evalDist(x, f0, [xs])
                                           for xs in testdata])
                        f1dist = np.array([self.evalDist(x, f1, [xs])
                                           for xs in testdata])
                    pre_dist[0][index_k][index_j] = f0dist
                    pre_dist[1][index_k][index_j] = f1dist

        indices = np.ones(testdata.shape[0], dtype=bool)
        ratiosList = []
        samples = []
        # Usefull values to inspect after the training
        alpha = np.zeros([csarray.shape[0], csarray2.shape[0], 2])
        n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_tot_1s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_tot_2s = np.zeros((csarray.shape[0], csarray2.shape[0]))
        n_zeros = np.zeros((npoints, npoints))
        target = self.F1_couplings[:]

        def compute_one_alpha_part(weights, xs):
            c1s_1 = np.multiply(weights,xs)
            c1s_1 = np.multiply(weights,c1s_1)
            alpha1 = c1s_1.sum()
            return alpha1
        
        exp_basis_weights = True
        for i, cs in enumerate(csarray):
            ratiosList.append([])
            for j, cs2 in enumerate(csarray2):
                target[1] = cs
                target[2] = cs2
                print '{0} {1}'.format(i, j)
                print target

                # Compute F1 couplings and cross sections
                c1s_1 = all_couplings[i * npoints + j]
                cross_section_1 = all_cross_sections[i * npoints + j]
                c1s_1 = np.multiply(c1s_1, cross_section_1)
                n_eff = c1s_1.sum()
                n_tot = np.abs(c1s_1).sum()
                n_eff_1 = n_eff / n_tot
                n_eff_1s[i, j] = n_eff_1
                n_tot_1s[i, j] = n_tot
                print 'n_eff 1: {0}'.format(n_eff / n_tot)
                c1s_1 = c1s_1 / c1s_1.sum()

                c1s_2 = all_couplings[npoints * npoints + i * npoints + j]
                cross_section_2 = all_cross_sections[
                    npoints * npoints + i * npoints + j]
                c1s_2 = np.multiply(c1s_2, cross_section_2)
                n_eff = c1s_2.sum()
                n_tot = np.abs(c1s_2).sum()
                n_eff_2 = n_eff / n_tot
                n_eff_2s[i, j] = n_eff_2
                n_tot_2s[i, j] = n_tot
                print 'n_eff 2: {0}'.format(n_eff / n_tot)
                c1s_2 = c1s_2 / c1s_2.sum()

                if exp_basis_weights == True:
                    neff2 = 1./n_eff_2
                    neff1 = 1./n_eff_1
                    #alpha1 = np.exp(-np.sqrt(neff1))
                    #alpha2 = np.exp(-np.sqrt(neff2))
                    alpha1 = np.exp(-neff1**(1./3.))
                    alpha2 = np.exp(-neff2**(1./3.))
                    alpha[i,j,0] = alpha1/(alpha1 + alpha2)
                    alpha[i,j,1] = alpha2/(alpha1 + alpha2)
                else:
                    alpha1 = compute_one_alpha_part(all_couplings[i*npoints + j],
                                                    all_cross_sections[i*npoints + j])
                    alpha2 = compute_one_alpha_part(all_couplings[npoints*npoints 
                             + i*npoints + j], all_cross_sections[npoints*npoints + i*npoints + j])
                            
                    alpha[i,j,0] = (1/2.)*(alpha2/(alpha1+alpha2))
                    alpha[i,j,1] = (1/2.)*(alpha1/(alpha1+alpha2))


                # Compute Bkg weights
                c0_arr_1 = np.zeros(15)
                c0_arr_2 = np.zeros(15)
                c0_arr_1[np.where(all_indexes[0] == basis_value)[0][0]] = 1.
                c0_arr_2[np.where(all_indexes[1] == basis_value)[0][0]] = 1.

                c0_arr_1 = c0_arr_1 / c0_arr_1.sum()
                c0_arr_2 = c0_arr_2 / c0_arr_2.sum()

                c1s = np.append(alpha[i, j, 0] * c1s_1, alpha[i, j, 1] * c1s_2)
                c0_arr = np.append(0.5 * c0_arr_1, 0.5 * c0_arr_2)

                print c0_arr

                cross_section = np.append(cross_section_1, cross_section_2)
                indexes = np.append(all_indexes[0], all_indexes[1])
                completeRatios, trueRatios = evaluateRatio(w, testdata, x=x,
                                                           plotting=False, roc=False, c0arr=c0_arr, c1arr=c1s, true_dist=true_dist,
                                                           pre_dist=pre_dist, pre_evaluation=pre_pdf, cross_section=cross_section,
                                                           indexes=indexes)
                completeRatios = 1. / completeRatios

                print completeRatios[completeRatios < 0.].shape
                n_zeros[i, j] = completeRatios[completeRatios < 0.].shape[0]
                ratiosList[i].append(completeRatios)
                n_eff_ratio[i,j] = (alpha[i,j,0] * n_eff_1 +
                    alpha[i,j,1] * n_eff_2)

                print 'total eff: {0}'.format(n_eff_ratio[i, j])
                if n_eff_ratio[i, j] > 0.05:
                    indices = np.logical_and(indices, completeRatios > 0.)
        print indices[indices].shape[0]
        for i, cs in enumerate(csarray):
            for j, cs2 in enumerate(csarray2):

                completeRatios = ratiosList[i][j]
                completeRatios = completeRatios[indices]
                if not use_log:
                    norm = completeRatios[completeRatios != 0.].shape[0]
                    if n_eff_ratio[i, j] < 0.05:
                        # TODO: Harcoded number
                        decomposedLikelihood[i, j] = 20000
                    else:
                        decomposedLikelihood[
                            i, j] = -2.*np.log(completeRatios).sum()
                else:
                    decomposedLikelihood[i, j] = completeRatios.sum()
                    trueLikelihood[i, j] = trueRatios.sum()
        decomposedLikelihood[decomposedLikelihood == 20000] = decomposedLikelihood[
            decomposedLikelihood != 20000].max()
        decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
        decMin = np.unravel_index(
            decomposedLikelihood.argmin(),
            decomposedLikelihood.shape)

        # Plotting
        # pixel plots
        saveFig(csarray,
                [csarray2,
                 n_eff_1s / n_eff_2s],
                makePlotName('comp',
                             'train',
                             type='n_eff_ratio'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_rat_1/n_rat_2 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_ratio],
                makePlotName('comp',
                             'train',
                             type='n_eff'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff/n_tot sum values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_1s],
                makePlotName('comp',
                             'train',
                             type='n_eff1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff_1 ratio values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_eff_2s],
                makePlotName('comp',
                             'train',
                             type='n_eff2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_eff_2 ratiovalues for g1,g2')

        saveFig(csarray,
                [csarray2,
                 alpha[:,
                       :,
                       0]],
                makePlotName('comp',
                             'train',
                             type='alpha1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='weights_1 ratio values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 alpha[:,
                       :,
                       1]],
                makePlotName('comp',
                             'train',
                             type='alpha2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='weights_2 ratiovalues for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_tot_1s],
                makePlotName('comp',
                             'train',
                             type='n_tot1'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_tot_1 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_tot_2s],
                makePlotName('comp',
                             'train',
                             type='n_tot2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_tot_2 values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 n_zeros],
                makePlotName('comp',
                             'train',
                             type='n_zeros'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='n_zeros values for g1,g2')

        saveFig(csarray,
                [csarray2,
                 decomposedLikelihood],
                makePlotName('comp',
                             'train',
                             type='pixel_g1g2'),
                labels=['composed'],
                pixel=True,
                marker=True,
                dir=self.dir,
                model_g=self.model_g,
                marker_value=(c1[0],
                              c1[1]),
                print_pdf=True,
                contour=True,
                title='Likelihood fit for g1,g2')

        #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()]
        X, Y = np.meshgrid(csarray, csarray2)

        saveFig(
            X,
            [
                Y,
                decomposedLikelihood],
            makePlotName(
                'comp',
                'train',
                type='multilikelihood_{0:.2f}_{1:.2f}'.format(
                    self.F1_couplings[1],
                    self.F1_couplings[2])),
            labels=['composed'],
            contour=True,
            marker=True,
            dir=self.dir,
            model_g=self.model_g,
            marker_value=(
                self.F1_couplings[1],
                self.F1_couplings[2]),
            print_pdf=True,
            min_value=(
                csarray[
                    decMin[0]],
                csarray2[
                    decMin[1]]))
        # print decMin
        print [csarray[decMin[0]], csarray2[decMin[1]]]
        if true_dist:
            trueLikelihood = trueLikelihood - trueLikelihood.min()
            trueMin = np.unravel_index(
                trueLikelihood.argmin(), trueLikelihood.shape)
            saveFig(csarray,
                    [decomposedLikelihood,
                     trueLikelihood],
                    makePlotName('comp',
                                 'train',
                                 type=post + 'likelihood_{0}'.format(n_sample)),
                    labels=['decomposed',
                            'true'],
                    axis=['c1[0]',
                          '-ln(L)'],
                    marker=True,
                    dir=self.dir,
                    marker_value=c1[0],
                    title='c1[0] Fitting',
                    print_pdf=True)
            return [[csarray[trueMin[0]], csarray2[trueMin[1]]],
                    [csarray2[decMin[0], csarray2[decMin[1]]]]]
        else:
            return [[0., 0.], [csarray[decMin[0]], csarray2[decMin[1]]]]
コード例 #13
0
def drawFigure4(dfMea, selectivity):
    """Draws Figure 4 (experiment on operator classes)"""

    fig = plt.figure(figsize=(7.5, 3.5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    dfUse = dfMea.query("sel == {}".format(selectivity))

    rtUncompr = dfUse.query(
        "operator_class == 'uncompressed'")["runtime [ms]"].mean()
    rtOtfDrc = dfUse.query(
        "operator_class == 'otf de/re-compression'")["runtime [ms]"].mean()
    rtSpecOp = dfUse.query(
        "operator_class == 'specialized'")["runtime [ms]"].mean()
    rtOtfMor = dfUse.query(
        "operator_class == 'otf morphing'")["runtime [ms]"].mean()

    # Number for the text
    if False:
        print("speedup OtfDrc vs. Uncompr: {}".format(rtUncompr / rtOtfDrc))
        print("speedup SpecOp vs. OtfDrc: {}".format(rtOtfDrc / rtSpecOp))
        print("slowdown OtfMor vs. SpecOp: {}".format(rtOtfMor / rtSpecOp))

    sns.barplot(
        ax=ax1,
        x="runtime [ms]",
        y="operator_class_long",
        order=[VAR_UU, VAR_OTFDRC, VAR_SPEC, VAR_OTFM],
        data=dfUse,
        ci=None,
    )
    ax1.set_ylabel(None)
    runtimeCap = 75
    ax1.set_xlim(right=runtimeCap)
    ax1.text(runtimeCap,
             0,
             "{:.0f} ms → ".format(rtUncompr),
             horizontalalignment="right",
             verticalalignment="center",
             color="white",
             fontsize=20)

    sns.barplot(
        ax=ax2,
        x="input size [MiB]",
        y="operator_class_long",
        order=[VAR_UU, VAR_OTFDRC, VAR_SPEC, VAR_OTFM],
        data=dfUse,
        ci=None,
    )
    ax2.set_ylabel(None)
    ax2.set_yticklabels([])
    footprintCap = 512
    ax2.set_xlim(right=footprintCap)
    ax2.set_xticks([0, 128, 256, 384, 512])
    ax2.text(footprintCap,
             0,
             "{:.0f} MiB → ".format(
                 dfUse.query("in_data_f == 'uncompr_f'")
                 ["input size [MiB]"].mean()),
             horizontalalignment="right",
             verticalalignment="center",
             color="white",
             fontsize=20)

    sns.despine()
    utils.saveFig("figure4_example")
def evalC1Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            c1_g='',model_g='mlp',use_log=False,true_dist=False, vars_g=None):

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  
  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)
  else:
    x = None

  score = ROOT.RooArgSet(w.var('score'))
  if use_log == True:
    evaluateRatio = test.evaluateLogDecomposedRatio
    post = 'log'
  else:
    evaluateRatio = test.evaluateDecomposedRatio
    post = ''

  npoints = 25
  csarray = np.linspace(0.01,0.10,npoints)
  testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,'test','F1'))
  decomposedLikelihood = np.zeros(npoints)
  trueLikelihood = np.zeros(npoints)
  c1s = np.zeros(c1.shape[0])
  pre_pdfratios = []
  pre_ratios = []
  for k,c0_ in enumerate(c0):
    pre_pdfratios.append([])
    pre_ratios.append([])
    for j,c1_ in enumerate(c1):
      if k <> j:
        f0pdf = w.pdf('bkghistpdf_{0}_{1}'.format(k,j))
        f1pdf = w.pdf('sighistpdf_{0}_{1}'.format(k,j))
        outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,
        'adaptive',k,j),testdata,model_g=model_g)
        pdfratios = [test.singleRatio(score,f0pdf,f1pdf,[xs]) for xs in outputs]
        pdfratios = np.array(pdfratios)
        pre_pdfratios[k].append(pdfratios)
      else:
        pre_pdfratios[k].append(None)
      if true_dist == True:          
        f0 = w.pdf('f{0}'.format(k))
        f1 = w.pdf('f{0}'.format(j))
        if len(testdata.shape) > 1:
          ratios = np.array([test.singleRatio(x,f0,f1,xs) for xs in testdata])
        else:
          ratios = np.array([test.singleRatio(x,f0,f1,[xs]) for xs in testdata])
        pre_ratios[k].append(ratios) 
  for i,cs in enumerate(csarray):
    c1s[:] = c1[:]
    c1s[0] = cs
    c1s = c1s/c1s.sum()
    decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
    plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_ratios=pre_ratios,
    pre_pdfratios=pre_pdfratios)
    if use_log == False:
      decomposedLikelihood[i] = np.log(decomposedRatios).sum()
      trueLikelihood[i] = np.log(trueRatios).sum()
    else:
      decomposedLikelihood[i] = decomposedRatios.sum()
      trueLikelihood[i] = trueRatios.sum()

  decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
  if true_dist == True:
    trueLikelihood = trueLikelihood - trueLikelihood.min() 
    saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood'),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=dir,
        marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
    return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()])
  else:
    return (0.,csarray[decomposedLikelihood.argmin()])
コード例 #15
0
def computeRatios(workspace,data_file,model_file,dir,model_g,c1_g,true_dist=False,
      vars_g=None):
  '''
    Use the computed score densities to compute 
    the ratio test.
 
  '''

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  

  print 'Calculating ratios'

  npoints = 50

  score = ROOT.RooArgSet(w.var('score'))
  getRatio = singleRatio

  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)

  # NN trained on complete model
  F0pdf = w.function('bkghistpdf_F0_F1')
  F1pdf = w.function('sighistpdf_F0_F1')
  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  testdata = data[:,:-1]
  testtarget = data[:,-1]

  '''
  # Make ratio considering tumor size unknown
  ts_idx = 2
  target = testdata[0]
  testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()])
  '''

  if true_dist == True and len(vars_g) == 1:
      xarray = np.linspace(1,10,npoints)
      # TODO: Harcoded dist names
      F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray])
      F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray])
      trueRatio = getRatio(F1dist, F0dist)

      outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g)

      F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
      F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

      completeRatio = getRatio(F0fulldist,F1fulldist)

      saveFig(xarray, [completeRatio, trueRatio], makePlotName('all','train',type='ratio'),title='Density Ratios',labels=['Trained', 'Truth'], print_pdf=True,dir=dir)
  
  outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),testdata,model_g=model_g)

  F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
  F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

  completeRatio = getRatio(F1fulldist,F0fulldist)
  complete_target = testtarget
  #Histogram F0-f0 for composed, full and true

  # Removing outliers
  numtest = completeRatio.shape[0]
  #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]

  complete_outliers = np.zeros(numtest,dtype=bool)
  complete_outliers = findOutliers(completeRatio)
  complete_target = testtarget[complete_outliers] 
  completeRatio = completeRatio[complete_outliers]

  bins = 70
  low = 0.6
  high = 1.2

  for l,name in enumerate(['sig','bkg']):
    minimum = completeRatio[complete_target == 1-l].min() 
    maximum = completeRatio[complete_target == 1-l].max()

    low = minimum - ((maximum - minimum) / bins)*10
    high = maximum + ((maximum - minimum) / bins)*10
    w.factory('ratio{0}[{1},{2}]'.format(name, low, high))
    ratio_var = w.var('ratio{0}'.format(name))

    numtest = completeRatio.shape[0] 
    hist = ROOT.TH1F('{0}hist_F0_f0'.format(name),'hist',bins,low,high)
    for val in completeRatio[complete_target == 1-l]:
      hist.Fill(val)
    datahist = ROOT.RooDataHist('{0}datahist_F0_f0'.format(name),'hist',
          ROOT.RooArgList(ratio_var),hist)
    ratio_var.setBins(bins)
    histpdf = ROOT.RooHistFunc('{0}histpdf_F0_f0'.format(name),'hist',
          ROOT.RooArgSet(ratio_var), datahist, 0)

    histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
    getattr(w,'import')(hist)
    getattr(w,'import')(datahist) # work around for morph = w.import(morph)
    getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
    #print '{0} {1} {2}'.format(curr,name,hist.Integral())

    if name == 'bkg':
      all_ratios_plots = [w.function('sighistpdf_F0_f0'),
            w.function('bkghistpdf_F0_f0')]
      all_names_plots = ['sig','bkg']
    
  printFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist',dir=dir,model_g=model_g,c1_g=c1_g),all_names_plots,dir=dir,model_g=model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)

  #completeRatio = np.log(completeRatio)
  completeRatio = completeRatio + np.abs(completeRatio.min())
  ratios_list = completeRatio / completeRatio.max()
  legends_list = ['composed','full']
  makeSigBkg([ratios_list],[complete_target],makePlotName('comp','all',type='sigbkg',dir=dir,model_g=model_g,c1_g=c1_g),dir=dir,model_g=model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')

  # Make transfer learning

  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  # Transforming f1 into f0
  data_f1 = data[data[:,-1] == 0.]
  data_f0 = data[data[:,-1] == 1.]
  testdata = data_f1[:,:-1]
  testtarget = data_f1[:,-1]

  '''
  # Make ratio considering tumor size unknown
  ts_idx = 2
  target = testdata[0]
  testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()])
  pdb.set_trace()
  '''

  xarray = testdata

  outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g)

  F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs])
  F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs])

  completeRatio = getRatio(F0fulldist,F1fulldist)

  if len(vars_g) == 1:
    F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray])
    F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray])
  else:
    F1dist = np.array([evalDist(x,w.pdf('f1'),xs) for xs in xarray])
    F0dist = np.array([evalDist(x,w.pdf('f0'),xs) for xs in xarray])

  trueRatio = getRatio(F1dist, F0dist)

  trueIndexes = findOutliers(trueRatio)
  completeIndexes = findOutliers(completeRatio)
  #indexes = np.logical_and(trueIndexes,completeIndexes)
  indexes = completeIndexes
  data_f1_red = data_f1
  #trueRatio = trueRatio[indexes]
  #completeRatio = completeRatio[indexes]
  #data_f1_red = data_f1[indexes]


  for f in range(10):
    feature = f
    # Transfering distributions
    # Doing histogram manipulation
    fig,ax = plt.subplots()
    colors = ['b-','r-','k-']
    colors_rgb = ['blue','red','black']
    
    hist,bins = np.histogram(data_f1[:,feature],bins=20, range=(0.,10.),density=True)


    hist_transfered,bins_1 = np.histogram(data_f1_red[:,feature],weights=trueRatio,bins=20, range=(0.,10.),density=True)
    hist_transfered_clf,bins_2 = np.histogram(data_f1_red[:,feature],bins=20,weights=completeRatio, range=(0.,10.),density=True)
    hist0,bins0 = np.histogram(data_f0[:,feature], bins=20, range=(0.,10.),density=True)

    #hist, bins =  ax.hist(data_f0[:,0],color=colors_rgb[0],label='true',bins=50,histtype='stepfilled',normed=1, alpha=0.5,range=[0,100]) 

    widths = np.diff(bins)
    #hist_transfered = hist*trueRatio
    #hist_transfered_clf = hist*completeRatio

    ax.bar(bins[:-1], hist0,widths,label='f0',alpha=0.5,color='red')
    #ax.bar(bins[:-1], hist_transfered,widths,label='f1 transfered (true)',
    #    alpha=0.5,color='blue')
    ax.bar(bins[:-1], hist_transfered_clf,widths,label='f1 transfered (trained)',
        alpha=0.5,color='green')

    ax.legend(frameon=False,fontsize=11)
    ax.set_xlabel('x') 
    ax.set_ylabel('p(x)') 
    if len(vars_g) > 1:
      ax.set_title('Transfered distributions feature {0}'.format(feature))
    else:
      ax.set_title('Transfered distributions')
    file_plot =  makePlotName('all','transf',type='hist_v{0}'.format(feature),model_g=model_g) 
    fig.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file_plot))
コード例 #16
0
def evalC1C2Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
            c1_g='',model_g='mlp',use_log=False,true_dist=False,vars_g=None,clf=None,
            verbose_printing=False):

  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
  w = f.Get('w')
  f.Close()
  if true_dist == True:
    vars = ROOT.TList()
    for var in vars_g:
      vars.Add(w.var(var))
    x = ROOT.RooArgSet(vars)
  else:
    x = None

  score = ROOT.RooArgSet(w.var('score'))
  if use_log == True:
    evaluateRatio = test.evaluateLogDecomposedRatio
    post = 'log'
  else:
    evaluateRatio = test.evaluateDecomposedRatio
    post = ''

  npoints = 25
  csarray = np.linspace(0.01,0.2,npoints)
  cs2array = np.linspace(0.1,0.4,npoints)
  testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,'test','F1'))

  decomposedLikelihood = np.zeros((npoints,npoints))
  trueLikelihood = np.zeros((npoints,npoints))
  c1s = np.zeros(c1.shape[0])
  c0s = np.zeros(c1.shape[0])
  pre_pdf = []
  pre_dist = []
  pre_pdf.extend([[],[]])
  pre_dist.extend([[],[]])
  for k,c0_ in enumerate(c0):
    pre_pdf[0].append([])
    pre_pdf[1].append([])
    pre_dist[0].append([])
    pre_dist[1].append([])
    for j,c1_ in enumerate(c1):
      if k <> j:
        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(k,j))
        f1pdf = w.function('sighistpdf_{0}_{1}'.format(k,j))
        outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,
        'adaptive',k,j),testdata,model_g=model_g,clf=clf)
        f0pdfdist = np.array([test.evalDist(score,f0pdf,[xs]) for xs in outputs])
        f1pdfdist = np.array([test.evalDist(score,f1pdf,[xs]) for xs in outputs])
        pre_pdf[0][k].append(f0pdfdist)
        pre_pdf[1][k].append(f1pdfdist)
      else:
        pre_pdf[0][k].append(None)
        pre_pdf[1][k].append(None)
      if true_dist == True:          
        f0 = w.pdf('f{0}'.format(k))
        f1 = w.pdf('f{0}'.format(j))
        if len(testdata.shape) > 1:
          f0dist = np.array([test.evalDist(x,f0,xs) for xs in testdata])
          f1dist = np.array([test.evalDist(x,f1,xs) for xs in testdata])
        else:
          f0dist = np.array([test.evalDist(x,f0,[xs]) for xs in testdata])
          f1dist = np.array([test.evalDist(x,f1,[xs]) for xs in testdata])
        pre_dist[0][k].append(f0dist) 
        pre_dist[1][k].append(f1dist) 
  
  # Evaluate Likelihood in different c1[0] and c1[1] values
  for i,cs in enumerate(csarray):
    for j, cs2 in enumerate(cs2array):
      c1s[:] = c1[:]
      c1s[0] = cs
      c1s[1] = cs2
      c1s[2] = 1.-cs-cs2
      decomposedRatios,trueRatios = evaluateRatio(w,testdata,
      x=x,plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,
      pre_evaluation=pre_pdf,
      pre_dist=pre_dist)

      if use_log == False:
        decomposedLikelihood[i,j] = np.log(decomposedRatios).sum()
        trueLikelihood[i,j] = np.log(trueRatios).sum()
      else:
        decomposedLikelihood[i,j] = decomposedRatios.sum()
        trueLikelihood[i,j] = trueRatios.sum()

  decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
  X,Y = np.meshgrid(csarray, cs2array)
  decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
  min_value = [csarray[decMin[0]],cs2array[decMin[1]]]
  if verbose_printing == True:
      saveFig(X,[Y,decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed','true'],contour=True,marker=True,dir=dir,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=min_value)
  if true_dist == True:
    trueLikelihood = trueLikelihood - trueLikelihood.min() 
    trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
    return [[csarray[trueMin[0]],cs2array[trueMin[1]]], [csarray[decMin[0]],cs2array[decMin[1]]]]
  else:
    return [[0.,0.],[csarray[decMin[0]],cs2array[decMin[1]]]]
コード例 #17
0
  def computeRatios(self,true_dist=False, vars_g=None,
      data_file='test',use_log=False):
    '''
      Use the computed score densities to compute 
      the decomposed ratio test.
      set true_dist to True if workspace have the true distributions to 
      make plots, in that case vars_g also must be provided
      Final result is histogram for ratios and signal - bkf rejection curves
    '''

    f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
    w = f.Get('w')
    f.Close()

    
    #TODO: This are Harcoded for now
    c1 = self.c1
    c0 = self.c0
    #c1 = np.multiply(c1, self.cross_section)
    c1 = c1/c1.sum()
    c0 = c0/c0.sum()

    print 'Calculating ratios'

    npoints = 50

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)

    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    score = ROOT.RooArgSet(w.var('score'))
    scoref = ROOT.RooArgSet(w.var('scoref'))

    if use_log == True:
      getRatio = self.singleLogRatio
    else:
      getRatio = self.singleRatio
   
    if self.preprocessing == True:
      if self.scaler == None:
        self.scaler = {}
        for k in range(self.nsamples):
         for j in range(self.nsamples):
           if k < j:
            self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j]))
            

    # NN trained on complete model
    F0pdf = w.function('bkghistpdf_F0_F1')
    F1pdf = w.function('sighistpdf_F0_F1')

    # TODO Here assuming that signal is first dataset  
    testdata, testtarget = loadData(data_file,self.F0_dist,0,dir=self.dir,c1_g=self.c1_g,preprocessing=False) 
    if len(vars_g) == 1:
      xarray = np.linspace(0,5,npoints)
      fullRatios,_ = evaluateRatio(w,xarray,x=x,plotting=True,roc=False,true_dist=True)

      F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in xarray])
      F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in xarray])
      y2 = getRatio(F1dist, F0dist)

      # NN trained on complete model
      outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),xarray.reshape(xarray.shape[0],1),model_g=self.model_g,clf=self.clf)
      F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
      F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

      pdfratios = getRatio(F1fulldist, F0fulldist)

      saveFig(xarray, [fullRatios, y2, pdfratios], makePlotName('all','train',type='ratio'+post),title='Likelihood Ratios',labels=['Composed trained', 'True', 'Full trained'],print_pdf=True,dir=self.dir)
      
    if true_dist == True:
      decomposedRatio,_ = evaluateRatio(w,testdata,x=x,plotting=False,roc=self.verbose_printing,true_dist=True)
    else:
      decomposedRatio,_ = evaluateRatio(w,testdata,c0arr=c0,c1arr=c1,plotting=True,
      roc=True,data_type=data_file)
    if len(testdata.shape) > 1:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata,model_g=self.model_g,clf=self.clf)
      #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),testdata,model_g=self.model_g)

    else:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata.reshape(testdata.shape[0],1),model_g=self.model_g,clf=self.clf)

    F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
    F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

    completeRatio = getRatio(F1fulldist,F0fulldist)
    if true_dist == True:
      if len(testdata.shape) > 1:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),xs) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),xs) for xs in testdata])
      else:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in testdata])

      realRatio = getRatio(F1dist,F0dist)

    decomposed_target = testtarget
    complete_target = testtarget
    real_target = testtarget
    #Histogram F0-f0 for composed, full and true

    # Removing outliers
    numtest = decomposedRatio.shape[0] 
    #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]

    #decomposed_outliers = np.zeros(numtest,dtype=bool)
    #complete_outliers = np.zeros(numtest,dtype=bool)
    #decomposed_outliers = self.findOutliers(decomposedRatio)
    #complete_outliers = self.findOutliers(completeRatio)
    #decomposed_target = testtarget[decomposed_outliers] 
    #complete_target = testtarget[complete_outliers] 
    #decomposedRatio = decomposedRatio[decomposed_outliers]
    #completeRatio = completeRatio[complete_outliers]
    if true_dist == True:
      real_outliers = np.zeros(numtest,dtype=bool)
      real_outliers = self.findOutliers(realRatio)
      #real_target = testtarget[real_outliers] 
      #realRatio = realRatio[real_outliers]

    all_ratios_plots = []
    all_names_plots = []
    bins = 70
    low = 0.6
    high = 1.2
    if use_log == True:
      low = -1.0
      high = 1.0
    low = []
    high = []
    low = []
    high = []
    ratios_vars = []
    for l,name in enumerate(['sig','bkg']):
      if true_dist == True:
        ratios_names = ['truth','full','composed']
        ratios_vec = [realRatio, completeRatio, decomposedRatio]
        target_vec = [real_target, complete_target, decomposed_target] 

        minimum = min([realRatio[real_target == 1-l].min(), 
              completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([realRatio[real_target == 1-l].max(), 
              completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      else:
        ratios_names = ['full','composed']
        ratios_vec = [completeRatio, decomposedRatio]
        target_vec = [complete_target, decomposed_target] 
        minimum = min([completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      low.append(minimum - ((maximum - minimum) / bins)*10)
      high.append(maximum + ((maximum - minimum) / bins)*10)
      w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l]))
      ratios_vars.append(w.var('ratio{0}'.format(name)))
    for curr, curr_ratios, curr_targets in zip(ratios_names,ratios_vec,target_vec):
      numtest = curr_ratios.shape[0] 
      for l,name in enumerate(['sig','bkg']):
        hist = ROOT.TH1F('{0}_{1}hist_F0_f0'.format(curr,name),'hist',bins,low[l],high[l])
        for val in curr_ratios[curr_targets == 1-l]:
          hist.Fill(val)
        datahist = ROOT.RooDataHist('{0}_{1}datahist_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgList(ratios_vars[l]),hist)
        ratios_vars[l].setBins(bins)
        histpdf = ROOT.RooHistFunc('{0}_{1}histpdf_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgSet(ratios_vars[l]), datahist, 0)

        histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
        getattr(w,'import')(hist)
        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
        #print '{0} {1} {2}'.format(curr,name,hist.Integral())
        if name == 'bkg':
          all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(curr)),
                w.function('{0}_bkghistpdf_F0_f0'.format(curr))])
          all_names_plots.append(['sig_{0}'.format(curr),'bkg_{0}'.format(curr)])
        
    all_ratios_plots = [[all_ratios_plots[j][i] for j,_ in enumerate(all_ratios_plots)] 
                for i,_ in enumerate(all_ratios_plots[0])]
    all_names_plots = [[all_names_plots[j][i] for j,_ in enumerate(all_names_plots)] 
                for i,_ in enumerate(all_names_plots[0])]

    printMultiFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),all_names_plots,setLog=True,dir=self.dir,model_g=self.model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)

    # scatter plot true ratio - composed - full ratio

    #if self.verbose_printing == True and true_dist == True:
    #  saveFig(completeRatio,[realRatio], makePlotName('full','train',type='scat'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['full trained ratio','true ratio'],dir=self.dir,model_g=self.model_g)
    #  saveFig(decomposedRatio,[realRatio], makePlotName('comp','train',type='scat'+post,dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed trained ratio','true ratio'],dir=self.dir, model_g=self.model_g)
    # signal - bkg rejection plots
    if use_log == True:
      decomposedRatio = np.exp(decomposedRatio)
      completeRatio = np.exp(completeRatio)
      if true_dist == True:
        realRatio = np.exp(realRatio)
    if true_dist == True:

      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max(),
                    realRatio/realRatio.max()]
      targets_list = [decomposed_target, complete_target, real_target]
      legends_list = ['composed', 'full', 'true']
    else:

      indices = (decomposedRatio > 0.)
      decomposedRatio = decomposedRatio[indices] 
      decomposed_target = decomposed_target[indices]
      indices = (completeRatio > 0.)
      completeRatio = completeRatio[indices]
      complete_target = complete_target[indices]

      completeRatio = np.log(completeRatio)
      decomposedRatio = np.log(decomposedRatio)
      decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min())
      completeRatio = completeRatio + np.abs(completeRatio.min())
      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max()]
      targets_list = [decomposed_target, complete_target]
      legends_list = ['composed','full']
    makeSigBkg(ratios_list,targets_list,makePlotName('comp','all',type='sigbkg'+post,dir=self.dir,
          model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')

    # Scatter plot to compare regression function and classifier score
    if self.verbose_printing == True and true_dist == True:
      testdata, testtarget = loadData('test',self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g) 
      if len(testdata.shape) > 1:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),xs) for xs in testdata])
      else:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),[xs]) for xs in testdata])
      if len(testdata.shape) > 1:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],testdata.shape[1]),model_g=self.model_g, clf=self.clf)
      else:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],1),model_g=self.model_g, clf=self.clf)
コード例 #18
0
  def evalC1Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None,coef_index=0):

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)
    else:
      x = None

    score = ROOT.RooArgSet(w.var('score'))
    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    csarray = np.linspace(c_min,c_max,npoints)
    decomposedLikelihood = np.zeros(npoints)
    trueLikelihood = np.zeros(npoints)
    c1s = np.zeros(c0.shape[0])
    pre_pdf = []
    pre_dist = []
    pre_pdf.extend([[],[]])
    pre_dist.extend([[],[]])
    # change this enumerates
    for k in enumerate(self.nsamples):
      pre_pdf[0].append([])
      pre_pdf[1].append([])
      pre_dist[0].append([])
      pre_dist[1].append([])
      for j in enumerate(self.nsamples):
        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
        if k <> j:
          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
          data = testdata
          if self.preprocessing == True:
            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
            self.dataset_names[max(index_k,index_j)],self.scaler) 
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          pre_pdf[0][k].append(f0pdfdist)
          pre_pdf[1][k].append(f1pdfdist)
        else:
          pre_pdf[0][k].append(None)
          pre_pdf[1][k].append(None)
        if true_dist == True:
          f0 = w.pdf('f{0}'.format(index_k))
          f1 = w.pdf('f{0}'.format(index_j))
          if len(testdata.shape) > 1:
            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
          else:
            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
          pre_dist[0][k].append(f0dist)
          pre_dist[1][k].append(f1dist)
    indices = np.ones(testdata.shape[0], dtype=bool)
    ratiosList = []
    samples = []
    # This is needed for calibration of full ratios
    #for i,sample in enumerate(self.dataset_names):
    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))

    #cross_section = self.cross_section / np.sum(self.cross_section)
    n_eff_ratio = np.zeros(csarray.shape[0])
    n_zeros = np.zeros(csarray.shape[0])
    cross_section = None
    for i,cs in enumerate(csarray):
      if weights_func <> None: 
        c1s = weights_func(cs,c1[1]) if coef_index == 0 else weights_func(c1[0],cs)
        print '{0} {1}'.format(cs, c1[1]) if coef_index == 0 else '{0} {1}'.format(c1[0],cs)
        print c1s
      else:
        c1s[:] = c1[:]
        c1s[c_eval] = cs
      if self.cross_section <> None:
        c1s = np.multiply(c1s,self.cross_section)
        #c1s = np.abs(c1s)
      n_eff = c1s.sum()
      n_tot = np.abs(c1s).sum()
      print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
      c1s = c1s/c1s.sum()
      decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
      plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
      pre_evaluation=pre_pdf,cross_section=cross_section)
      decomposedRatios = 1./decomposedRatios
      n_eff_ratio[i] = n_eff/n_tot
      n_zeros[i] = decomposedRatios[decomposedRatios < 0.].shape[0]
      print decomposedRatios[decomposedRatios < 0.].shape 
      #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
      #    c0,c1s,debug=debug,samples_data=samples,index=i) 
      #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
      #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
      #'composed calibrated'], dir=self.dir, model_g=self.model_g)
      ratiosList.append(decomposedRatios)
      #indices = np.logical_and(indices, decomposedRatios > 0.)
    for i,cs in enumerate(csarray):
      decomposedRatios = ratiosList[i]
      if use_log == False:
        if samples_ids <> None:
          ratios = decomposedRatios
          ids = samples_ids
          decomposedLikelihood[i] = (np.dot(np.log(ratios),
              np.array([c1[x] for x in ids]))).sum()
        else:
          decomposedRatios[decomposedRatios < 0.] = 1.0
          decomposedLikelihood[i] = -np.log(decomposedRatios).sum()
          print decomposedLikelihood[i]
          
        trueLikelihood[i] = -np.log(trueRatios).sum()
      else:
        decomposedLikelihood[i] = decomposedRatios.sum()
        trueLikelihood[i] = trueRatios.sum()
    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
    # print n_eff/n_zero relation
    #saveFig(csarray,[n_eff_ratio, n_zeros/n_zeros.max()],makePlotName('eff_ratio','zeros',type=post+'plot_g2'),labels=['n_eff/n_tot','zeros/{0}'.format(n_zeros.max())],axis=['g2','values'],marker=True,dir=self.dir,marker_value=c1[0],title='#zeros and n_eff/n_tot given g2',print_pdf=True,model_g=self.model_g)
    #saveFig(n_eff_ratio, [n_zeros/n_zeros.max()], makePlotName('eff_ratio','zeros',type='scat',
    #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['n_eff/n_tot', 
    #'#zeros/{0}'.format(n_zeros.max())], dir=self.dir, model_g=self.model_g,title='# zeros given n_eff/n_tot ratio')

    if true_dist == True:
      trueLikelihood = trueLikelihood - trueLikelihood.min()
      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
      return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()])
    else:
      saveFig(csarray,[decomposedLikelihood],makePlotName('comp','train',type='likelihood_g2'),labels=['decomposed'],axis=['g2','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[c_eval],title='g2 Fitting',print_pdf=True,model_g=self.model_g)
      pdb.set_trace()
      return (0.,csarray[decomposedLikelihood.argmin()])