def MakeConf(Verbose=True):

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--inputrootfile',
        '-i',
        type=str,
        help='Path to root file',
        # default='../applyRegression/Config_Sep30_electron_EB_ECALonly_appliedRegression_training.root'
        # default='../Config_Oct25_electron_EB_ECALonly_appliedRegression_training_ptWeight.root'
    )
    parser.add_argument('--region',
                        metavar='N',
                        type=str,
                        nargs='+',
                        help='Specify regions',
                        default=['EB', 'EE'],
                        choices=['EE', 'EB'])
    parser.add_argument(
        '-n',
        '--name',
        type=str,
        default='NONE',
        help='Append a string at the end of the name of this config')
    parser.add_argument(
        '--fast',
        action='store_true',
        help='Change some BDT options to be faster (but maybe less precise)')
    args = parser.parse_args()

    datestr = strftime('%b%d')

    # Photon does not have TRK vars
    particle = 'electron'

    # Reads off the name of this .py file, so it's clear what made this.
    moduleName = os.path.basename(__file__).replace('MakeConf_',
                                                    '').replace('.py', '')

    return_configs = []
    for region in args.region:

        # Instantiate the Config class which prints a .config file
        config = Config()

        config.Name = 'Config_' + datestr + '_' + particle + '_' + region + '_' + moduleName

        # Append a string to the name if given by the user
        if not args.name == 'NONE':
            config.Name += '_' + args.name

        if args.fast: config.Name += '_FastOptions'

        config.InputFiles = os.path.abspath(args.inputrootfile)

        # config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )
        config.Tree = 'een_analyzer/correction'  # <-- May want to change this some time.

        ########################################
        # BDT settings
        ########################################

        if not args.fast:
            config.Options = [
                "MinEvents=200",
                "Shrinkage=0.1",
                "NTrees=1000",
                "MinSignificance=5.0",
                "EventWeight=1",
            ]
        else:
            config.Options = [
                "MinEvents=300",  # Down from 200
                "Shrinkage=0.2",
                "NTrees=1000",
                "MinSignificance=5.0",  # Down from 5.0
                "EventWeight=1",
            ]

        # config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
        config.Target = "(genEnergy * (trkMomentum*trkMomentum*trkMomentumRelError*trkMomentumRelError + (scRawEnergy+scPreshowerEnergy)*(scRawEnergy+scPreshowerEnergy)*resolution*resolution) / ( (scRawEnergy+scPreshowerEnergy)*response*trkMomentum*trkMomentum*trkMomentumRelError*trkMomentumRelError + trkMomentum*(scRawEnergy+scPreshowerEnergy)*(scRawEnergy+scPreshowerEnergy)*resolution*resolution ))"

        # Probably neither of these are necessary
        config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
        config.HistoConfig = "jobs/dummy_Histo.config"

        config.CutEB = "scIsEB"
        config.CutEE = "!scIsEB"

        if region == 'EB':
            config.DoEB = "True"
        else:
            config.DoEB = "False"

        # # ======================================
        # # Sample division - need a part for the ECAL-only training, and a part for the combination

        # # 80% for the main BDT - divide the sample in divideNumber pieces, and use all but one piece for the main BDT
        # divideNumber            = 3
        # config.CutBase          = "eventNumber%{0}!=0".format( divideNumber )

        # # 10% for combination, 10% for error
        # config.CutComb          = "eventNumber%{0}==0 && eventNumber%{1}==0".format( divideNumber, 2*divideNumber )
        # config.CutError         = "eventNumber%{0}==0 && eventNumber%{1}!=0".format( divideNumber, 2*divideNumber )

        # config.CutBase  += " && NtupID<5000"
        # config.CutComb  += " && NtupID<5000"
        # config.CutError += " && NtupID<5000"

        # Limit number of events in training
        # config.CutBase  = "NtupID<1000"
        # config.CutComb  = "NtupID<1000"
        # config.CutError = "NtupID<1000"

        # Pre-selected events have this variable set to 1.0
        config.CutBase = "(ptWeightCut)"
        config.CutComb = "(ptWeightCut)"
        config.CutError = "(ptWeightCut)"

        ########################################
        # Order tree branches
        ########################################

        common_vars = [
            "(scRawEnergy+scPreshowerEnergy)*response",
            "resolution/response",
            "trkMomentumRelError",
            "trkMomentum/((scRawEnergy+scPreshowerEnergy)*response)",
            "eleEcalDriven",
            "fbrem",
            "gsfchi2",
            "gsfndof",
            "trkEta",
            "trkPhi",
        ]

        config.VariablesEB = common_vars + []

        config.VariablesEE = common_vars + []

        if Verbose:
            print '\n' + '-' * 70
            print 'Making config file ' + config.Name + '.config'
            print '  Using the following branches for EE:'
            print '    ' + '\n    '.join(config.VariablesEE)
            print '  Using the following branches for EB:'
            print '    ' + '\n    '.join(config.VariablesEB)

        config.DoCombine = "False"
        config.DoErrors = "False"

        ########################################
        # Output
        ########################################

        config.Parse()
        return_configs.append(config)

    return return_configs
def main():

    # Small testing samples -- do NOT use these for plots!
    # fullpt_root_file = 'Ntup_Jun22_fullpt_testing_sample.root'
    # lowpt_root_file  = 'Ntup_Jun22_lowpt_testing_sample.root'

    # Low + high pt sample
    fullpt_root_file = 'Ntup_Jun22_fullpt_training.root'
    
    # Only low pt sample
    lowpt_root_file = 'Ntup_Jun22_lowpt_training.root'


    ntup_path = os.path.join( os.environ['CMSSW_BASE'], 'src/NTuples' )
    datestr = strftime( '%b%d' )

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )



    ########################################
    # BASE CONFIG - This is low pt electrons
    #   Configs for photons and and other pt ranges are created by altering this one
    ########################################

    # Instantiate the Config class which prints a .config file
    base_config = Config()

    base_config.Name       = 'Config_electron_lowpt_' + datestr

    base_config.InputFiles = physical_path( lowpt_root_file )
    base_config.Tree       = 'een_analyzer/ElectronTree'


    ########################################
    # BDT settings
    ########################################

    base_config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )", # <-- What to do?
        ]

    base_config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    base_config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    base_config.HistoConfig      = "jobs/dummy_Histo.config"
    
    base_config.CutBase          = "eventNumber%2==0"
    base_config.CutEB            = "scIsEB"
    base_config.CutEE            = "!scIsEB"
    base_config.CutError         = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"

    # Add an additional cut so that the regression is fast
    # NtupIDcut = 10000
    # base_config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # base_config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # base_config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )


    ########################################
    # Order tree branches
    ########################################

    common_vars = [

        # ======================================
        # Common variables

        'pt',
        # 'nVtx',          # rho should be enough information for the BDT
        'scRawEnergy',
        # 'scEta',         # Requires alignment information; use crystal number of the seed instead
        # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
        'scEtaWidth',
        'scPhiWidth',
        'scSeedRawEnergy/scRawEnergy',
        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',


        # ======================================
        # Showershape variables

        # Use full 5x5 instead
        # 'r9',
        # 'eHorizontal',
        # 'eVertical',
        # 'sigmaIetaIeta',
        # 'sigmaIetaIphi',
        # 'sigmaIphiIphi',
        # 'e5x5',
        # 'e3x3',
        # 'eMax',
        # 'e2nd',
        # 'eTop',
        # 'eBottom',
        # 'eLeft',
        # 'eRight',
        # 'e2x5Max',
        # 'e2x5Left',
        # 'e2x5Right',
        # 'e2x5Top',
        # 'e2x5Bottom',

        # Normalization to scRawEnergy necessary?

        'full5x5_r9',
        'full5x5_eHorizontal',
        'full5x5_eVertical',
        'full5x5_sigmaIetaIeta',
        'full5x5_sigmaIetaIphi',
        'full5x5_sigmaIphiIphi',
        'full5x5_e5x5',
        'full5x5_e3x3',
        'full5x5_eMax',
        'full5x5_e2nd',
        'full5x5_eTop',
        'full5x5_eBottom',
        'full5x5_eLeft',
        'full5x5_eRight',
        'full5x5_e2x5Max',
        'full5x5_e2x5Left',
        'full5x5_e2x5Right',
        'full5x5_e2x5Top',
        'full5x5_e2x5Bottom',


        # ======================================
        # Saturation variables

        'N_SATURATEDXTALS',
        'seedIsSaturated',
        'seedCrystalEnergy/scRawEnergy',


        # ======================================
        # Cluster variables

        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy',

        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',

        ]


    base_config.VariablesEB = common_vars + [
        # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
        # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
        ]

    base_config.VariablesEE = common_vars + [
        # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
        # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
        'iXCoordinate',
        'iYCoordinate',
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
        ]


    # print 'Using the following branches for EE:'
    # print '    ' + '\n    '.join( base_config.VariablesEE )
    # print 'Using the following branches for EB:'
    # print '    ' + '\n    '.join( base_config.VariablesEB )


    ########################################
    # Ep combination
    ########################################

    # Only do the combination for the electron
    base_config.DoCombine        = "True"

    base_config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    base_config.CutComb          = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    base_config.VariablesComb = [
        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
        'BDTerror/BDTresponse',
        'trkMomentum',
        'trkMomentumRelError',
        'BDTerror/BDTresponse/trkMomentumRelError',
        '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
        ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
          'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
        'eleEcalDriven',
        'eleTrackerDriven',
        'eleClass',
        'scIsEB',
        ]




    ########################################
    # Output
    ########################################

    # lowpt electrons - this is simply the base config defined above
    base_config.Parse()

    # fullpt electrons - only change the root file
    base_config.Name       = 'Config_electron_fullpt_' + datestr
    base_config.InputFiles = physical_path( fullpt_root_file )
    base_config.Parse()

    # lowpt photons
    base_config.Name       = 'Config_photon_lowpt_' + datestr
    base_config.InputFiles = physical_path( lowpt_root_file )
    base_config.Tree       = 'een_analyzer/PhotonTree'
    base_config.DoCombine  = "False"
    base_config.Parse()

    # fullpt photons
    base_config.Name       = 'Config_photon_fullpt_' + datestr
    base_config.InputFiles = physical_path( fullpt_root_file )
    base_config.Tree       = 'een_analyzer/PhotonTree'
    base_config.DoCombine  = "False"
    base_config.Parse()



    ########################################
    # OLD VARIABLES
    ########################################

    # Remove the max( ..., 0.1, ) from the eventweight
    base_config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
        ]


    # lowpt electrons
    base_config.Name       = 'Config_electron_lowpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path( lowpt_root_file )
    base_config.Tree       = 'een_analyzer/ElectronTree'
    base_config.DoCombine  = "True"

    OLD_common_electron_vars = [
        'nVtx',
        'scRawEnergy',
        'scEta',
        'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'r9',
        'scSeedRawEnergy/scRawEnergy',
        'eMax',
        'e2nd',
        'eHorizontal',  # 'scSeedLeftRightAsym',
        'eVertical',    # 'scSeedTopBottomAsym',
        'sigmaIetaIeta',
        'sigmaIetaIphi',
        'sigmaIphiIphi',
        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy/scRawEnergy',

        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',
        ]

    base_config.VariablesEB = OLD_common_electron_vars + [
        'cryEtaCoordinate',
        'cryPhiCoordinate',
        'iEtaCoordinate',
        'iPhiCoordinate',
        # 'scSeedCryEta',
        # 'scSeedCryPhi',
        # 'scSeedCryIetaV2',
        # 'scSeedCryIphiV2',
        ]

    base_config.VariablesEE = OLD_common_electron_vars + [
        'scPreshowerEnergy/scRawEnergy',
        # 'scSeedCryIxV2',
        # 'scSeedCryIyV2',
        'iXCoordinate',
        'iYCoordinate',
        ]

    base_config.Parse()

    # fullpt oldvars
    base_config.Name       = 'Config_electron_fullpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path( fullpt_root_file )
    base_config.Parse()


    # lowpt photons
    base_config.Name       = 'Config_photon_lowpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path( lowpt_root_file )
    base_config.Tree       = 'een_analyzer/PhotonTree'
    base_config.DoCombine  = "False"

    OLD_common_photon_vars = [
        'nVtx',
        'scRawEnergy',
        # 'scEta',
        # 'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'r9',
        'scSeedRawEnergy/scRawEnergy',
        # 'scSeedLeftRightAsym',
        # 'scSeedTopBottomAsym',
        'sigmaIetaIeta',
        'sigmaIetaIphi',
        'sigmaIphiIphi',
        'N_ECALClusters',        

        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',

        'e3x3/e5x5',
        'eMax/e5x5',
        'e2nd/e5x5',
        'eTop/e5x5',
        'eBottom/e5x5',
        'eLeft/e5x5',
        'eRight/e5x5',
        'e2x5Max/e5x5',
        'e2x5Left/e5x5',
        'e2x5Right/e5x5',
        'e2x5Top/e5x5',
        'e2x5Bottom/e5x5',
        ]

    base_config.VariablesEB = OLD_common_photon_vars + [
        'e5x5/scSeedRawEnergy',
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
        ]

    base_config.VariablesEE = OLD_common_photon_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
        'iXCoordinate',
        'iYCoordinate',
        ]

    base_config.Parse()


    # fullpt photons    
    base_config.Name       = 'Config_photon_fullpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path( fullpt_root_file )
    base_config.Parse()



    # Print all branches as a check
    print "\nAll branches in lowpt root file:"
    Read_branches_from_rootfile( physical_path(lowpt_root_file) , base_config.Tree )
def main():

    # Instantiate the Config class which prints a .config file
    config = Config()

    config.Name = "photonConfig"

    # filename of the input root file
    #root_file = 'FlatNtupFull_13May_SepTrees.root'
    #root_file = 'FlatNtupFull_18May_DoubleElectron.root'
    #root_file = 'Ntup_20May_DoubleElectron.root'
    #root_file = 'Ntup_30May_DoublePhoton_somefailed.root'
    #root_file = 'Ntup_01June_DoublePhoton.root'
    #root_file = 'Ntup_05June_photons_LowHighPt.root'
    root_file = 'Ntup_12June_photons_lowhighpt.root'

    ntup_path = os.path.abspath('../../NTuples/')

    # For iterating:
    #root_file = 'output.root'
    #ntup_path = '/afs/cern.ch/work/t/tklijnsm/EGM/CMSSW_8_0_4/src/SimpleFlatTreeProducer/SimpleNtuplizer/cfgs/'

    if not os.path.isdir(ntup_path):
        print 'Error: "{0}"" is not a directory'.format(ntup_path)
    physical_path = lambda input_root_file: os.path.join(
        ntup_path, input_root_file)

    ########################################
    # BDT settings
    ########################################

    config.InputFiles = physical_path(root_file)

    # Don't run the Ep combination for photons
    config.DoCombine = "False"

    config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
    ]

    config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    #config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    config.HistoConfig = "jobs/dummy_Histo.config"

    config.CutBase = "eventNumber%2==0 && genPt<2000"
    config.CutEB = "scIsEB"
    config.CutEE = "!scIsEB"
    config.CutError = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"
    #config.CutComb          = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    # # Add an additional cut so that the regression is fast
    # NtupIDcut = 200
    # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )

    ########################################
    # Order Electron tree branches
    ########################################

    # Try to read tree branches from the input root file
    #tree_gDirectory = 'een_analyzer/ElectronTree'
    tree_gDirectory = 'een_analyzer/PhotonTree'

    # ---------------------
    # JBs branches for EE (order is important)
    # ---------------------

    common_vars = [

        # Same as in electron case
        'nVtx',
        'scRawEnergy',
        # 'scEta',
        # 'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'scSeedR9',
        'scSeedRawEnergy/scRawEnergy',
        # 'scSeedLeftRightAsym',
        # 'scSeedTopBottomAsym',
        'scSeedSigmaIetaIeta',
        'scSeedSigmaIetaIphi',
        'scSeedSigmaIphiIphi',
        'N_ECALClusters',

        # 'clusterMaxDR',
        # 'clusterMaxDRDPhi',
        # 'clusterMaxDRDEta',
        # 'clusterMaxDRRawEnergy/scRawEnergy',
        # 'clusterRawEnergy[0]/scRawEnergy',
        # 'clusterRawEnergy[1]/scRawEnergy',
        # 'clusterRawEnergy[2]/scRawEnergy',
        # 'clusterDPhiToSeed[0]',
        # 'clusterDPhiToSeed[1]',
        # 'clusterDPhiToSeed[2]',
        # 'clusterDEtaToSeed[0]',
        # 'clusterDEtaToSeed[1]',
        # 'clusterDEtaToSeed[2]',

        # Only for photons
        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',
        'e3x3/e5x5',
        'eMax/e5x5',
        'e2nd/e5x5',
        'eTop/e5x5',
        'eBottom/e5x5',
        'eLeft/e5x5',
        'eRight/e5x5',
        'e2x5Max/e5x5',
        'e2x5Left/e5x5',
        'e2x5Right/e5x5',
        'e2x5Top/e5x5',
        'e2x5Bottom/e5x5',
    ]

    EE_vars = common_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
        'iXCoordinate',
        'iYCoordinate',
    ]

    EB_vars = common_vars + [
        'e5x5/scSeedRawEnergy',
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
    ]

    print 'Using the following branches for EE:'
    print '    ' + '\n    '.join(EE_vars)
    print 'Using the following branches for EB:'
    print '    ' + '\n    '.join(EB_vars)

    # Write to class
    #config.Tree        = tree_gDirectory
    config.Tree = tree_gDirectory
    config.VariablesEE = EE_vars
    config.VariablesEB = EB_vars

    print "\nAll branches in root file:"
    Read_branches_from_rootfile(physical_path(root_file), tree_gDirectory)

    ########################################
    # Output config file
    ########################################

    out_filename = 'photon_config.config'
    config.Parse(out_filename)

    # Test if the config file can be read by ROOT TEnv
    print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format(
        out_filename)
    I_TEnv = ROOT.TEnv()
    I_TEnv.ReadFile(out_filename, 0)
    I_TEnv.Print()
    print 'Exited normally'
    print '=' * 70
    print
def main():

    # Instantiate the Config class which prints a .config file
    config = Config()

    config.Name = "photonConfig"

    # filename of the input root file
    #root_file = 'FlatNtupFull_13May_SepTrees.root'
    #root_file = 'FlatNtupFull_18May_DoubleElectron.root'
    #root_file = 'Ntup_20May_DoubleElectron.root'
    #root_file = 'Ntup_30May_DoublePhoton_somefailed.root'
    #root_file = 'Ntup_01June_DoublePhoton.root'
    #root_file = 'Ntup_05June_photons_LowHighPt.root'
    # root_file = 'Ntup_12June_photons_lowhighpt.root'
    

    # root_file = 'Ntup_Jun22_lowpt_training.root'
    root_file = 'Ntup_Jun22_fullpt_training.root'

    # out_filename = 'Config_24Jun_photon_lowpt_OLDVARS.config'
    out_filename = 'Config_24Jun_photon_fullpt_OLDVARS.config'


    config.Name = out_filename.replace('.config','')

    ntup_path = os.path.abspath('../../NTuples/')

    # For iterating:
    #root_file = 'output.root'
    #ntup_path = '/afs/cern.ch/work/t/tklijnsm/EGM/CMSSW_8_0_4/src/SimpleFlatTreeProducer/SimpleNtuplizer/cfgs/'

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )


    ########################################
    # BDT settings
    ########################################

    config.InputFiles = physical_path( root_file )

    # Don't run the Ep combination for photons
    config.DoCombine        = "False"

    config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
        ]

    config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    #config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    config.HistoConfig      = "jobs/dummy_Histo.config"
    
    config.CutBase          = "eventNumber%2==0 && genPt<2000"
    config.CutEB            = "scIsEB"
    config.CutEE            = "!scIsEB"
    config.CutError         = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"
    #config.CutComb          = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    # # Add an additional cut so that the regression is fast
    # NtupIDcut = 200
    # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )


    ########################################
    # Order Electron tree branches
    ########################################

    # Try to read tree branches from the input root file
    #tree_gDirectory = 'een_analyzer/ElectronTree'
    tree_gDirectory = 'een_analyzer/PhotonTree'

    # ---------------------
    # JBs branches for EE (order is important)
    # ---------------------

    common_vars = [
        
        # Same as in electron case
        'nVtx',
        'scRawEnergy',
        # 'scEta',
        # 'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'scSeedR9',
        'scSeedRawEnergy/scRawEnergy',
        # 'scSeedLeftRightAsym',
        # 'scSeedTopBottomAsym',
        'scSeedSigmaIetaIeta',
        'scSeedSigmaIetaIphi',
        'scSeedSigmaIphiIphi',
        'N_ECALClusters',
        
        # 'clusterMaxDR',
        # 'clusterMaxDRDPhi',
        # 'clusterMaxDRDEta',
        # 'clusterMaxDRRawEnergy/scRawEnergy',
        # 'clusterRawEnergy[0]/scRawEnergy',
        # 'clusterRawEnergy[1]/scRawEnergy',
        # 'clusterRawEnergy[2]/scRawEnergy',
        # 'clusterDPhiToSeed[0]',
        # 'clusterDPhiToSeed[1]',
        # 'clusterDPhiToSeed[2]',
        # 'clusterDEtaToSeed[0]',
        # 'clusterDEtaToSeed[1]',
        # 'clusterDEtaToSeed[2]',
        
        # Only for photons
        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',

        'e3x3/e5x5',
        'eMax/e5x5',
        'e2nd/e5x5',
        'eTop/e5x5',
        'eBottom/e5x5',
        'eLeft/e5x5',
        'eRight/e5x5',
        'e2x5Max/e5x5',
        'e2x5Left/e5x5',
        'e2x5Right/e5x5',
        'e2x5Top/e5x5',
        'e2x5Bottom/e5x5',
        ]

    EE_vars = common_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
        'iXCoordinate',
        'iYCoordinate',
        ]

    EB_vars = common_vars + [
        'e5x5/scSeedRawEnergy',
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
        ]


    print 'Using the following branches for EE:'
    print '    ' + '\n    '.join( EE_vars )
    print 'Using the following branches for EB:'
    print '    ' + '\n    '.join( EB_vars )

    # Write to class
    #config.Tree        = tree_gDirectory
    config.Tree          = tree_gDirectory
    config.VariablesEE   = EE_vars
    config.VariablesEB   = EB_vars

    print "\nAll branches in root file:"
    Read_branches_from_rootfile( physical_path(root_file) , tree_gDirectory )


    ########################################
    # Output config file
    ########################################

    # out_filename = 'photon_config.config'
    # out_filename = 'Config_24Jun_photons_OLDVARS.config'
    config.Parse( out_filename )

    # Test if the config file can be read by ROOT TEnv
    print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
    I_TEnv = ROOT.TEnv()
    I_TEnv.ReadFile( out_filename, 0 )
    I_TEnv.Print()
    print 'Exited normally'
    print '='*70
    print
Beispiel #5
0
def Make_conf(Verbose=True):

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--inputrootfile',
        '-i',
        type=str,
        help='Path to root file',
        # default='/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples/Ntup_Jul22_fullpt_training.root'
        default=
        '/afs/cern.ch/work/r/rcoelhol/public/CMSSW_8_0_12/src/NTuples/Ntup_10Nov_ElectronPhoton.root'
    )
    parser.add_argument('--particle',
                        metavar='N',
                        type=str,
                        nargs='+',
                        help='Specify particles',
                        default=['electron', 'photon'],
                        choices=['electron', 'photon'])
    parser.add_argument('--region',
                        metavar='N',
                        type=str,
                        nargs='+',
                        help='Specify regions',
                        default=['EB', 'EE'],
                        choices=['EE', 'EB'])
    parser.add_argument(
        '-n',
        '--name',
        type=str,
        default='NONE',
        help='Append a string at the end of the name of this config')
    parser.add_argument(
        '--fast',
        action='store_true',
        help='Change some BDT options to be faster (but maybe less precise)')
    args = parser.parse_args()

    datestr = strftime('%b%d')
    return_configs = []

    for region in args.region:
        for particle in args.particle:

            # Instantiate the Config class which prints a .config file
            config = Config()

            config.Name = 'Config_' + datestr + '_' + particle + '_' + region

            if args.name and args.name != 'NONE':
                config.Name += '_' + args.name

            config.InputFiles = os.path.abspath(args.inputrootfile)
            config.Tree = 'een_analyzer/{0}Tree'.format(particle.capitalize())

            ########################################
            # BDT settings
            ########################################

            if args.fast:

                config.Options = [
                    "MinEvents=300",
                    "Shrinkage=0.15",
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    "EventWeight=1",
                ]
                config.Name += '_FastOptions'

            else:

                config.Options = [
                    "MinEvents=200",
                    "Shrinkage=0.1",
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    "EventWeight=1",
                ]

            config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

            # Probably not needed
            config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
            config.HistoConfig = "jobs/dummy_Histo.config"

            config.CutEB = "scIsEB"
            config.CutEE = "!scIsEB"

            if region == 'EB':
                config.DoEB = "True"
            else:
                config.DoEB = "False"

            # ======================================
            # Sample division - need a part for the ECAL-only training, and a part for the combination

            config.CutBase = '1.0'

            # These are for the old (regular BDT) EP combination - no longer needed
            config.CutComb = '1.0'
            config.CutError = '1.0'

            # Cut events (otherwise running into CPU limits)
            config.CutBase += " && NtupID<4000"
            config.CutComb += " && NtupID<4000"
            config.CutError += " && NtupID<4000"

            ########################################
            # Order tree branches
            ########################################

            # Agreed list on November 23:

            # eval[0]  = raw_energy;
            # eval[1]  = the_sc->etaWidth();
            # eval[2]  = the_sc->phiWidth();
            # eval[3]  = full5x5_ess.e5x5/raw_energy;
            # eval[4]  = ele.hcalOverEcalBc();
            # eval[5]  = rhoValue_;
            # eval[6]  = theseed->eta() - the_sc->position().Eta();
            # eval[7]  = reco::deltaPhi( theseed->phi(),the_sc->position().Phi());
            # eval[8]  = full5x5_ess.r9;
            # eval[9]  = full5x5_ess.sigmaIetaIeta;
            # eval[10]  = full5x5_ess.sigmaIetaIphi;
            # eval[11]  = full5x5_ess.sigmaIphiIphi;
            # eval[12]  = full5x5_ess.eMax/full5x5_ess.e5x5;
            # eval[13]  = full5x5_ess.e2nd/full5x5_ess.e5x5;
            # eval[14]  = full5x5_ess.eTop/full5x5_ess.e5x5;
            # eval[15]  = full5x5_ess.eBottom/full5x5_ess.e5x5;
            # eval[16]  = full5x5_ess.eLeft/full5x5_ess.e5x5;
            # eval[17]  = full5x5_ess.eRight/full5x5_ess.e5x5;
            # eval[18]  = EcalClusterToolsT<true>::e2x5Max(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[19]  = EcalClusterToolsT<true>::e2x5Left(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[20]  = EcalClusterToolsT<true>::e2x5Right(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[21]  = EcalClusterToolsT<true>::e2x5Top(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[22]  = EcalClusterToolsT<true>::e2x5Bottom(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[23]  = N_SATURATEDXTALS;
            # eval[24]  = std::max(0,numberOfClusters);
            # eval[25] = clusterRawEnergy[0]/raw_energy;
            # eval[26] = clusterRawEnergy[1]/raw_energy;
            # eval[27] = clusterRawEnergy[2]/raw_energy;
            # eval[28] = clusterDPhiToSeed[0];
            # eval[29] = clusterDPhiToSeed[1];
            # eval[30] = clusterDPhiToSeed[2];
            # eval[31] = clusterDEtaToSeed[0];
            # eval[32] = clusterDEtaToSeed[1];
            # eval[33] = clusterDEtaToSeed[2];

            # eval[34] = ieta;
            # eval[35] = iphi;
            # eval[36] = (ieta-signieta)%5;
            # eval[37] = (iphi-1)%2;
            # eval[38] = (abs(ieta)<=25)*((ieta-signieta)) + (abs(ieta)>25)*((ieta-26*signieta)%20);
            # eval[39] = (iphi-1)%20;

            # eval[34] = raw_es_energy/raw_energy;
            # eval[35] = the_sc->preshowerEnergyPlane1()/raw_energy;
            # eval[36] = the_sc->preshowerEnergyPlane2()/raw_energy;
            # eval[37] = eeseedid.ix();
            # eval[38] = eeseedid.iy();

            common_vars = [

                # ======================================
                # Common variables
                'scRawEnergy',
                'scEtaWidth',
                'scPhiWidth',
                'full5x5_e5x5/scRawEnergy',
                'hadronicOverEm',
                'rhoValue',
                'delEtaSeed',
                'delPhiSeed',

                # ======================================
                # Showershape variables
                'full5x5_r9',
                'full5x5_sigmaIetaIeta',
                'full5x5_sigmaIetaIphi',
                'full5x5_sigmaIphiIphi',
                'full5x5_eMax/full5x5_e5x5',
                'full5x5_e2nd/full5x5_e5x5',
                'full5x5_eTop/full5x5_e5x5',
                'full5x5_eBottom/full5x5_e5x5',
                'full5x5_eLeft/full5x5_e5x5',
                'full5x5_eRight/full5x5_e5x5',
                'full5x5_e2x5Max/full5x5_e5x5',
                'full5x5_e2x5Left/full5x5_e5x5',
                'full5x5_e2x5Right/full5x5_e5x5',
                'full5x5_e2x5Top/full5x5_e5x5',
                'full5x5_e2x5Bottom/full5x5_e5x5',

                # ======================================
                # Saturation variables
                'N_SATURATEDXTALS',

                # ======================================
                # Cluster variables
                'N_ECALClusters',
                'clusterRawEnergy[0]/scRawEnergy',
                'clusterRawEnergy[1]/scRawEnergy',
                'clusterRawEnergy[2]/scRawEnergy',
                'clusterDPhiToSeed[0]',
                'clusterDPhiToSeed[1]',
                'clusterDPhiToSeed[2]',
                'clusterDEtaToSeed[0]',
                'clusterDEtaToSeed[1]',
                'clusterDEtaToSeed[2]',
            ]

            # EB specific
            config.VariablesEB = common_vars + [
                'iEtaCoordinate',
                'iPhiCoordinate',
                'iEtaMod5',
                'iPhiMod2',
                'iEtaMod20',
                'iPhiMod20',
            ]

            # EE specific
            config.VariablesEE = common_vars + [
                'iXCoordinate',
                'iYCoordinate',
                'scPreshowerEnergy/scRawEnergy',
                # 'preshowerEnergyPlane1/scRawEnergy', # Disabled as of November 2016 (did not influence regression)
                # 'preshowerEnergyPlane2/scRawEnergy',# Disabled as of November 2016 (did not influence regression)
            ]

            if Verbose:
                print '\n' + '-' * 70
                print 'Making config file ' + config.Name + '.config'
                print '  Using the following branches for EE:'
                print '    ' + '\n    '.join(config.VariablesEE)
                print '  Using the following branches for EB:'
                print '    ' + '\n    '.join(config.VariablesEB)

            ########################################
            # Ep combination
            ########################################

            # NOVEMBER 25: NO LONGER NECESSARY TO RUN OLD EP COMBO
            config.DoCombine = "False"
            config.DoErrors = "False"

            ########################################
            # Output
            ########################################

            # if Verbose:
            #     # Print all branches as a check
            #     print "\nAll branches in root file:"
            #     Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

            config.Parse()

            # # Test if the config file can be read by ROOT TEnv
            # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
            # I_TEnv = ROOT.TEnv()
            # I_TEnv.ReadFile( out_filename, 0 )
            # I_TEnv.Print()
            # print 'Exited normally'
            # print '='*70
            # print

            return_configs.append(config)

    return return_configs
Beispiel #6
0
def Make_conf(Verbose=True):

    # root_file = 'Ntup_Jul15_fullpt_training.root'

    # Small testing samples -- do NOT use these for plots!
    #    root_file = 'Ntup_Jun22_lowpt_testing_sample.root'
    # Low + high pt sample
    # root_file = 'Ntup_Jun22_fullpt_training.root'
    # Only low pt sample
    # root_file = 'Ntup_Jun22_lowpt_training.root'

    # ------------------------------
    # 22 July samples - Latest set of branches
    # root_file = 'Ntup_Jul22_fullpt_testing_sample.root' # ONLY FOR QUICK TESTS
    root_file = 'Ntup_Jul22_fullpt_training.root'

    # ntup_path = os.path.join( '/data/userdata/rclsa/ElectronTrees/Jul17/' )
    # ntup_path = os.path.join( os.environ['CMSSW_BASE'], 'src/NTuples' )

    if os.environ['HOSTNAME'] == 't3ui17':
        ntup_path = os.path.join(
            '/mnt/t3nfs01/data01/shome/tklijnsm/Samples/RegressionSamples',
            '22Jul_samples')
    else:
        ntup_path = '/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples'

    datestr = strftime('%b%d')

    if not os.path.isdir(ntup_path):
        print 'Error: "{0}"" is not a directory'.format(ntup_path)
    physical_path = lambda input_root_file: os.path.join(
        ntup_path, input_root_file)

    return_configs = []

    for region in ['EB', 'EE']:

        # MODIFIED VERSION
        # - Only ECAL_AND_TRK configs made
        # - Target set to unweighted

        # for ECAL_AND_TRK in [ False, True ]:
        for ECAL_AND_TRK in [True]:

            for particle in ['electron', 'photon']:
                if ECAL_AND_TRK and particle == 'photon':
                    continue  # Photon doesn't have TRK vars

                # Instantiate the Config class which prints a .config file
                config = Config()

                config.Name = 'Config_' + datestr + '_' + particle + '_' + region

                # MODIFIED: Force SAMETGT name
                # if ECAL_AND_TRK:
                #     config.Name += '_ECALTRK'
                # else:
                #     config.Name += '_ECALonly'
                config.Name += '_SAMETGT'

                config.InputFiles = physical_path(root_file)
                config.Tree = 'een_analyzer/{0}Tree'.format(
                    particle.capitalize())

                ########################################
                # BDT settings
                ########################################

                config.Options = [
                    "MinEvents=200",
                    "Shrinkage=0.1",
                    # "NTrees=2000", # <-- Moved up from 1000 to include extra tracker effects
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    # "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )", # <-- What to do?
                    "EventWeight=1",  # <-- No one really likes the weights
                ]

                # MODIFIED: Just force the normal target

                # # Set the target - be careful to include the tracker energy in the target for the Ep combination
                # if ECAL_AND_TRK:
                #     config.Target           = "genEnergy * (ECALweight + TRKweight) / ( scRawEnergy*ECALweight + scPreshowerEnergy*ECALweight + trkMomentum*TRKweight )"
                # else:
                #     config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
                # # config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

                config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

                config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
                config.HistoConfig = "jobs/dummy_Histo.config"

                config.CutEB = "scIsEB"
                config.CutEE = "!scIsEB"

                if region == 'EB':
                    config.DoEB = "True"
                else:
                    config.DoEB = "False"

                # ======================================
                # Sample division - need a part for the ECAL-only training, and a part for the combination

                # 80% for the main BDT - divide the sample in divideNumber pieces, and use all but one piece for the main BDT
                divideNumber = 3
                config.CutBase = "eventNumber%{0}!=0".format(divideNumber)

                # 10% for combination, 10% for error
                config.CutComb = "eventNumber%{0}==0 && eventNumber%{1}==0".format(
                    divideNumber, 2 * divideNumber)
                config.CutError = "eventNumber%{0}==0 && eventNumber%{1}!=0".format(
                    divideNumber, 2 * divideNumber)

                config.CutBase += " && NtupID<5000"
                config.CutComb += " && NtupID<5000"
                config.CutError += " && NtupID<5000"

                ########################################
                # Order tree branches
                ########################################

                common_vars = [

                    # ======================================
                    # Common variables

                    # 'pt',            # RCLSA: you cannot use the result of the previous training for the new one
                    # 'nVtx',          # rho should be enough information for the BDT
                    # 'scEta',         # Requires alignment information; use crystal number of the seed instead
                    # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
                    #            'scSeedRawEnergy/scRawEnergy',  # RCLSA: Redundant with the one below
                    'scRawEnergy',
                    'scEtaWidth',
                    'scPhiWidth',
                    'full5x5_e5x5/scRawEnergy',
                    'hadronicOverEm',
                    'rhoValue',
                    'delEtaSeed',
                    'delPhiSeed',

                    # ======================================
                    # Showershape variables

                    # Use full 5x5 instead
                    # 'r9',
                    # 'eHorizontal',
                    # 'eVertical',
                    # 'sigmaIetaIeta',
                    # 'sigmaIetaIphi',
                    # 'sigmaIphiIphi',
                    # 'e5x5',
                    # 'e3x3',
                    # 'eMax',
                    # 'e2nd',
                    # 'eTop',
                    # 'eBottom',
                    # 'eLeft',
                    # 'eRight',
                    # 'e2x5Max',
                    # 'e2x5Left',
                    # 'e2x5Right',
                    # 'e2x5Top',
                    # 'e2x5Bottom',

                    # Normalization to scRawEnergy necessary?
                    'full5x5_r9',
                    #            'full5x5_eHorizontal',   # RCLSA: Redundant
                    #            'full5x5_eVertical',     # RCLSA: Redundant
                    'full5x5_sigmaIetaIeta',
                    'full5x5_sigmaIetaIphi',
                    'full5x5_sigmaIphiIphi',
                    # 'full5x5_e5x5',               # RCLSA: Use ratios
                    # 'full5x5_e3x3/full5x5_e5x5',  # RCLSA: Redundant, this is R9
                    'full5x5_eMax/full5x5_e5x5',
                    'full5x5_e2nd/full5x5_e5x5',
                    'full5x5_eTop/full5x5_e5x5',
                    'full5x5_eBottom/full5x5_e5x5',
                    'full5x5_eLeft/full5x5_e5x5',
                    'full5x5_eRight/full5x5_e5x5',
                    'full5x5_e2x5Max/full5x5_e5x5',
                    'full5x5_e2x5Left/full5x5_e5x5',
                    'full5x5_e2x5Right/full5x5_e5x5',
                    'full5x5_e2x5Top/full5x5_e5x5',
                    'full5x5_e2x5Bottom/full5x5_e5x5',

                    # ======================================
                    # Saturation variables
                    'N_SATURATEDXTALS',
                    #            'seedIsSaturated',   # RCLSA: probably overkill
                    #            'seedCrystalEnergy/scSeedRawEnergy',   # RCLSA: There is only 1/1e6 cases in which the max energy is not the seed

                    # ======================================
                    # Cluster variables
                    'N_ECALClusters',
                    #            'clusterMaxDR',          # RCLSA Very mismodelled variables
                    #            'clusterMaxDRDPhi',
                    #            'clusterMaxDRDEta',
                    #            'clusterMaxDRRawEnergy',
                    'clusterRawEnergy[0]/scRawEnergy',
                    'clusterRawEnergy[1]/scRawEnergy',
                    'clusterRawEnergy[2]/scRawEnergy',
                    'clusterDPhiToSeed[0]',
                    'clusterDPhiToSeed[1]',
                    'clusterDPhiToSeed[2]',
                    'clusterDEtaToSeed[0]',
                    'clusterDEtaToSeed[1]',
                    'clusterDEtaToSeed[2]',
                ]

                if ECAL_AND_TRK:
                    # ADD THE TRK VARIABLES TO THE MAIN BDT
                    # Output should be compared to ECAL-only BDT output
                    common_vars += [

                        # '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                        # This is simply the corrected energy, I don't we need to pass this (The BDT already has the target in there)

                        # 'BDTerror/BDTresponse',
                        # I guess the error we also don't have to pass -- The BDT will already be conscious of the error
                        'trkMomentumRelError',

                        # Replace this simply by the trkMomentum only
                        # 'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                        'trkMomentum',
                        'eleEcalDriven',
                        # 'full5x5_r9',
                        'fbrem',
                        'gsfchi2',
                        'gsfndof',
                        'trkEta',
                        'trkPhi'
                    ]

                config.VariablesEB = common_vars + [
                    # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iEtaCoordinate',
                    'iPhiCoordinate',
                    'iEtaMod5',
                    'iPhiMod2',
                    'iEtaMod20',
                    'iPhiMod20',
                ]

                config.VariablesEE = common_vars + [
                    # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iXCoordinate',
                    'iYCoordinate',
                    'scPreshowerEnergy/scRawEnergy',
                    'preshowerEnergyPlane1/scRawEnergy',
                    'preshowerEnergyPlane2/scRawEnergy',
                ]

                if Verbose:
                    print '\n' + '-' * 70
                    print 'Making config file ' + config.Name + '.config'
                    print '  Using the following branches for EE:'
                    print '    ' + '\n    '.join(config.VariablesEE)
                    print '  Using the following branches for EB:'
                    print '    ' + '\n    '.join(config.VariablesEB)

                ########################################
                # Ep combination
                ########################################

                # Only do the combination for the electron AND there are no tracking variables
                if particle == 'electron' and not ECAL_AND_TRK:

                    config.DoCombine = "True"

                    config.TargetComb = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
                    config.TargetError = "1.253*abs(BDTresponse - genEnergy/(scRawEnergy+scPreshowerEnergy))"

                    config.VariablesComb = [
                        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                        'BDTerror/BDTresponse',
                        'trkMomentumRelError',
                        'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                        'eleEcalDriven',
                        'full5x5_r9',
                        'fbrem',
                        'gsfchi2',
                        'gsfndof',
                        'trkEta',
                        'trkPhi'  # The best way to describe cracks is to use the track (unbiased) directorion
                        # 'trkMomentum',                                # RCLSA Again, let us choose one absolute scale and the rest be relative
                        # 'BDTerror/BDTresponse/trkMomentumRelError',
                        # ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
                        #   'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
                        # 'eleClass',
                        # 'scIsEB',
                    ]

                else:
                    config.DoCombine = "False"

                # # Not necessary if the TRK vars in the main BDT
                # config.DoCombine        = "False"

                ########################################
                # Output
                ########################################

                # if Verbose:
                #     # Print all branches as a check
                #     print "\nAll branches in root file:"
                #     Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

                config.Parse()

                # # Test if the config file can be read by ROOT TEnv
                # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
                # I_TEnv = ROOT.TEnv()
                # I_TEnv.ReadFile( out_filename, 0 )
                # I_TEnv.Print()
                # print 'Exited normally'
                # print '='*70
                # print

                return_configs.append(config)

    return return_configs
def Make_conf(Verbose=True):


    parser = argparse.ArgumentParser()
    parser.add_argument( '--inputrootfile', '-i', type=str, help='Path to root file',
        # default='/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples/Ntup_Jul22_fullpt_training.root'
        default='/afs/cern.ch/work/r/rcoelhol/public/CMSSW_8_0_12/src/NTuples/Ntup_10Nov_ElectronPhoton.root'
        )
    parser.add_argument(
        '--particle', metavar='N', type=str, nargs='+', help='Specify particles',
        default=['electron','photon'],choices=['electron','photon']
        )
    parser.add_argument(
        '--region', metavar='N', type=str, nargs='+', help='Specify regions',
        default=['EB','EE'],choices=['EE','EB']
        )
    parser.add_argument(
        '-n', '--name', type=str, default='NONE', help='Append a string at the end of the name of this config'
        )
    parser.add_argument( '--fast', action='store_true', help='Change some BDT options to be faster (but maybe less precise)')
    args = parser.parse_args()


    datestr = strftime( '%b%d' )
    return_configs = []

    for region in args.region:
        for particle in args.particle:

            # Instantiate the Config class which prints a .config file
            config = Config()

            config.Name       = 'Config_' + datestr + '_' + particle + '_' + region

            if args.name and args.name!='NONE' : config.Name += '_' + args.name


            config.InputFiles = os.path.abspath( args.inputrootfile )
            config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )


            ########################################
            # BDT settings
            ########################################

            if args.fast:

                config.Options = [
                    "MinEvents=300",
                    "Shrinkage=0.15",
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    "EventWeight=1",
                    ]
                config.Name += '_FastOptions'

            else:

                config.Options = [
                    "MinEvents=200",
                    "Shrinkage=0.1",
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    "EventWeight=1",
                    ]


            config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

            # Probably not needed
            config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
            config.HistoConfig      = "jobs/dummy_Histo.config"
            
            config.CutEB            = "scIsEB"
            config.CutEE            = "!scIsEB"


            if region == 'EB':
                config.DoEB         = "True"
            else:
                config.DoEB         = "False"


            # ======================================
            # Sample division - need a part for the ECAL-only training, and a part for the combination


            config.CutBase          = '1.0'

            # These are for the old (regular BDT) EP combination - no longer needed
            config.CutComb          = '1.0'
            config.CutError         = '1.0'


            # Cut events (otherwise running into CPU limits)
            config.CutBase  += " && NtupID<4000"
            config.CutComb  += " && NtupID<4000"
            config.CutError += " && NtupID<4000"


            ########################################
            # Order tree branches
            ########################################


            # Agreed list on November 23:

            # eval[0]  = raw_energy;
            # eval[1]  = the_sc->etaWidth();
            # eval[2]  = the_sc->phiWidth(); 
            # eval[3]  = full5x5_ess.e5x5/raw_energy;
            # eval[4]  = ele.hcalOverEcalBc();
            # eval[5]  = rhoValue_;
            # eval[6]  = theseed->eta() - the_sc->position().Eta();
            # eval[7]  = reco::deltaPhi( theseed->phi(),the_sc->position().Phi());
            # eval[8]  = full5x5_ess.r9;
            # eval[9]  = full5x5_ess.sigmaIetaIeta;
            # eval[10]  = full5x5_ess.sigmaIetaIphi;
            # eval[11]  = full5x5_ess.sigmaIphiIphi;
            # eval[12]  = full5x5_ess.eMax/full5x5_ess.e5x5;
            # eval[13]  = full5x5_ess.e2nd/full5x5_ess.e5x5;
            # eval[14]  = full5x5_ess.eTop/full5x5_ess.e5x5;
            # eval[15]  = full5x5_ess.eBottom/full5x5_ess.e5x5;
            # eval[16]  = full5x5_ess.eLeft/full5x5_ess.e5x5;
            # eval[17]  = full5x5_ess.eRight/full5x5_ess.e5x5;
            # eval[18]  = EcalClusterToolsT<true>::e2x5Max(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[19]  = EcalClusterToolsT<true>::e2x5Left(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[20]  = EcalClusterToolsT<true>::e2x5Right(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[21]  = EcalClusterToolsT<true>::e2x5Top(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[22]  = EcalClusterToolsT<true>::e2x5Bottom(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
            # eval[23]  = N_SATURATEDXTALS;
            # eval[24]  = std::max(0,numberOfClusters);
            # eval[25] = clusterRawEnergy[0]/raw_energy;
            # eval[26] = clusterRawEnergy[1]/raw_energy;
            # eval[27] = clusterRawEnergy[2]/raw_energy;
            # eval[28] = clusterDPhiToSeed[0];
            # eval[29] = clusterDPhiToSeed[1];
            # eval[30] = clusterDPhiToSeed[2];
            # eval[31] = clusterDEtaToSeed[0];
            # eval[32] = clusterDEtaToSeed[1];
            # eval[33] = clusterDEtaToSeed[2];

            # eval[34] = ieta;
            # eval[35] = iphi;
            # eval[36] = (ieta-signieta)%5;
            # eval[37] = (iphi-1)%2;
            # eval[38] = (abs(ieta)<=25)*((ieta-signieta)) + (abs(ieta)>25)*((ieta-26*signieta)%20);  
            # eval[39] = (iphi-1)%20;

            # eval[34] = raw_es_energy/raw_energy;
            # eval[35] = the_sc->preshowerEnergyPlane1()/raw_energy;
            # eval[36] = the_sc->preshowerEnergyPlane2()/raw_energy;
            # eval[37] = eeseedid.ix();
            # eval[38] = eeseedid.iy();


            common_vars = [

                # ======================================
                # Common variables

                'scRawEnergy',
                'scEtaWidth',
                'scPhiWidth',
                'full5x5_e5x5/scRawEnergy',
                'hadronicOverEm',
                'rhoValue',
                'delEtaSeed',
                'delPhiSeed',


                # ======================================
                # Showershape variables

                'full5x5_r9',
                'full5x5_sigmaIetaIeta',
                'full5x5_sigmaIetaIphi',
                'full5x5_sigmaIphiIphi',
                'full5x5_eMax/full5x5_e5x5',
                'full5x5_e2nd/full5x5_e5x5',
                'full5x5_eTop/full5x5_e5x5',
                'full5x5_eBottom/full5x5_e5x5',
                'full5x5_eLeft/full5x5_e5x5',
                'full5x5_eRight/full5x5_e5x5',
                'full5x5_e2x5Max/full5x5_e5x5',
                'full5x5_e2x5Left/full5x5_e5x5',
                'full5x5_e2x5Right/full5x5_e5x5',
                'full5x5_e2x5Top/full5x5_e5x5',
                'full5x5_e2x5Bottom/full5x5_e5x5',


                # ======================================
                # Saturation variables

                'N_SATURATEDXTALS',


                # ======================================
                # Cluster variables

                'N_ECALClusters',

                'clusterRawEnergy[0]/scRawEnergy',
                'clusterRawEnergy[1]/scRawEnergy',
                'clusterRawEnergy[2]/scRawEnergy',
                'clusterDPhiToSeed[0]',
                'clusterDPhiToSeed[1]',
                'clusterDPhiToSeed[2]',
                'clusterDEtaToSeed[0]',
                'clusterDEtaToSeed[1]',
                'clusterDEtaToSeed[2]',

                ]

            # EB specific
            config.VariablesEB = common_vars + [
                'iEtaCoordinate',
                'iPhiCoordinate',
                'iEtaMod5',
                'iPhiMod2',
                'iEtaMod20',
                'iPhiMod20',
                ]

            # EE specific
            config.VariablesEE = common_vars + [
                'iXCoordinate',
                'iYCoordinate',
                'scPreshowerEnergy/scRawEnergy',
                # 'preshowerEnergyPlane1/scRawEnergy', # Disabled as of November 2016 (did not influence regression)
                # 'preshowerEnergyPlane2/scRawEnergy',# Disabled as of November 2016 (did not influence regression)
                ]


            if Verbose:
                print '\n' + '-'*70
                print 'Making config file ' + config.Name + '.config'
                print '  Using the following branches for EE:'
                print '    ' + '\n    '.join( config.VariablesEE )
                print '  Using the following branches for EB:'
                print '    ' + '\n    '.join( config.VariablesEB )


            ########################################
            # Ep combination
            ########################################

            # NOVEMBER 25: NO LONGER NECESSARY TO RUN OLD EP COMBO
            config.DoCombine        = "False"
            config.DoErrors         = "False"


            ########################################
            # Output
            ########################################

            # if Verbose:
            #     # Print all branches as a check
            #     print "\nAll branches in root file:"
            #     Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

            config.Parse()

            # # Test if the config file can be read by ROOT TEnv
            # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
            # I_TEnv = ROOT.TEnv()
            # I_TEnv.ReadFile( out_filename, 0 )
            # I_TEnv.Print()
            # print 'Exited normally'
            # print '='*70
            # print

            return_configs.append( config )

    return return_configs
def main():

    # Instantiate the Config class which prints a .config file
    config = Config()

    config.Name = 'electronConfig'

    # filename of the input root file
    #root_file = 'FlatNtupFull_13May_SepTrees.root'
    #root_file = 'FlatNtupFull_18May_DoubleElectron.root'
    #root_file = 'Ntup_20May_DoubleElectron.root'
    #root_file = 'Ntup_30May_DoublePhoton_somefailed.root'
    #root_file = 'Ntup_01June_DoubleElectron.root'
    root_file = 'Ntup_05June_electrons_LowHighPt.root'

    ntup_path = os.path.abspath('../../NTuples/')

    # For iterating:
    #root_file = 'output.root'
    #root_file = 'PhotonTest_01June.root'
    #ntup_path = '/afs/cern.ch/work/t/tklijnsm/EGM/CMSSW_8_0_4/src/SimpleFlatTreeProducer/SimpleNtuplizer/cfgs/'

    if not os.path.isdir(ntup_path):
        print 'Error: "{0}"" is not a directory'.format(ntup_path)
    physical_path = lambda input_root_file: os.path.join(
        ntup_path, input_root_file)

    ########################################
    # BDT settings
    ########################################

    config.InputFiles = physical_path(root_file)

    config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
    ]

    config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    config.TargetComb = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    config.HistoConfig = "jobs/dummy_Histo.config"

    config.CutBase = "eventNumber%2==0 && genPt<2000"
    config.CutEB = "scIsEB"
    config.CutEE = "!scIsEB"
    config.CutError = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"
    config.CutComb = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    # Add an additional cut so that the regression is fast
    # NtupIDcut = 10000
    # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )

    ########################################
    # Order Electron tree branches
    ########################################

    # Try to read tree branches from the input root file
    tree_gDirectory = 'een_analyzer/ElectronTree'
    #tree_gDirectory = 'een_analyzer/PhotonTree'

    common_vars = [
        'nVtx',
        'scRawEnergy',
        'scEta',
        'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'scSeedR9',
        'scSeedRawEnergy/scRawEnergy',
        'scSeedEmax',
        'scSeedE2nd',
        'scSeedLeftRightAsym',
        'scSeedTopBottomAsym',
        'scSeedSigmaIetaIeta',
        'scSeedSigmaIetaIphi',
        'scSeedSigmaIphiIphi',
        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy/scRawEnergy',
        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',
    ]

    EE_vars = common_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'scSeedCryIxV2',
        'scSeedCryIyV2',
    ]

    EB_vars = common_vars + [
        'scSeedCryEta',
        'scSeedCryPhi',
        'scSeedCryIetaV2',
        'scSeedCryIphiV2',
    ]

    print 'Using the following branches for EE:'
    print '    ' + '\n    '.join(EE_vars)
    print 'Using the following branches for EB:'
    print '    ' + '\n    '.join(EB_vars)

    # Write to class
    #config.Tree        = tree_gDirectory
    config.Tree = tree_gDirectory
    config.VariablesEE = EE_vars
    config.VariablesEB = EB_vars

    #config.VariablesComb = Ep_branches
    config.VariablesComb = [
        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
        'BDTerror/BDTresponse',
        'trkMomentum',
        'trkMomentumRelError',
        'BDTerror/BDTresponse/trkMomentumRelError',
        '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
        ('( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
         'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)'
         ),
        # 'ecalDriven',
        # 'trackerDrivenSeed',
        # 'classification',
        'eleEcalDriven',
        'eleTrackerDriven',
        'eleClass',
        'scIsEB',
    ]

    print "\nAll branches in root file:"
    Read_branches_from_rootfile(physical_path(root_file), tree_gDirectory)

    ########################################
    # Output config file
    ########################################

    out_filename = 'electron_config.config'
    config.Parse(out_filename)

    # Test if the config file can be read by ROOT TEnv
    print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format(
        out_filename)
    I_TEnv = ROOT.TEnv()
    I_TEnv.ReadFile(out_filename, 0)
    I_TEnv.Print()
    print 'Exited normally'
    print '=' * 70
    print
def main():

    # Instantiate the Config class which prints a .config file
    config = Config()

    # config.Name = 'electronConfig'

    # filename of the input root file
    #root_file = 'FlatNtupFull_13May_SepTrees.root'
    #root_file = 'FlatNtupFull_18May_DoubleElectron.root'
    #root_file = 'Ntup_20May_DoubleElectron.root'
    #root_file = 'Ntup_30May_DoublePhoton_somefailed.root'
    #root_file = 'Ntup_01June_DoubleElectron.root'
    # root_file = 'Ntup_05June_electrons_LowHighPt.root'

    # root_file = 'Ntup_Jun22_lowpt_training.root'
    root_file = 'Ntup_Jun22_fullpt_training.root'

    # out_filename = 'Config_24Jun_electron_lowpt_OLDVARS.config'
    out_filename = 'Config_24Jun_electron_fullpt_OLDVARS.config'


    config.Name = out_filename.replace('.config','')

    ntup_path = os.path.abspath('../../NTuples/')

    # For iterating:
    #root_file = 'output.root'
    #root_file = 'PhotonTest_01June.root'
    #ntup_path = '/afs/cern.ch/work/t/tklijnsm/EGM/CMSSW_8_0_4/src/SimpleFlatTreeProducer/SimpleNtuplizer/cfgs/'

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )


    ########################################
    # BDT settings
    ########################################

    config.InputFiles = physical_path( root_file )

    config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
        ]

    config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    config.HistoConfig      = "jobs/dummy_Histo.config"
    
    config.CutBase          = "eventNumber%2==0 && genPt<2000"
    config.CutEB            = "scIsEB"
    config.CutEE            = "!scIsEB"
    config.CutError         = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"
    config.CutComb          = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    # Add an additional cut so that the regression is fast
    # NtupIDcut = 10000
    # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )


    ########################################
    # Order Electron tree branches
    ########################################

    # Try to read tree branches from the input root file
    tree_gDirectory = 'een_analyzer/ElectronTree'
    #tree_gDirectory = 'een_analyzer/PhotonTree'

    common_vars = [

        'nVtx',
        'scRawEnergy',
        'scEta',
        'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'scSeedR9',
        'scSeedRawEnergy/scRawEnergy',
        'scSeedEmax',
        'scSeedE2nd',
        'scSeedLeftRightAsym',
        'scSeedTopBottomAsym',
        'scSeedSigmaIetaIeta',
        'scSeedSigmaIetaIphi',
        'scSeedSigmaIphiIphi',
        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy/scRawEnergy',

        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',

        ]

    EE_vars = common_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'scSeedCryIxV2',
        'scSeedCryIyV2',
        ]

    EB_vars = common_vars + [
        'scSeedCryEta',
        'scSeedCryPhi',
        'scSeedCryIetaV2',
        'scSeedCryIphiV2',
        ]


    print 'Using the following branches for EE:'
    print '    ' + '\n    '.join( EE_vars )
    print 'Using the following branches for EB:'
    print '    ' + '\n    '.join( EB_vars )

    # Write to class
    #config.Tree        = tree_gDirectory
    config.Tree          = tree_gDirectory
    config.VariablesEE   = EE_vars
    config.VariablesEB   = EB_vars

    #config.VariablesComb = Ep_branches
    config.VariablesComb = [
        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
        'BDTerror/BDTresponse',
        'trkMomentum',
        'trkMomentumRelError',
        'BDTerror/BDTresponse/trkMomentumRelError',
        '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
        ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
          'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
        # 'ecalDriven',
        # 'trackerDrivenSeed',
        # 'classification',
        'eleEcalDriven',
        'eleTrackerDriven',
        'eleClass',
        'scIsEB',
        ]

    print "\nAll branches in root file:"
    Read_branches_from_rootfile( physical_path(root_file) , tree_gDirectory )

    ########################################
    # Output config file
    ########################################

    config.Parse( out_filename )

    # Test if the config file can be read by ROOT TEnv
    print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
    I_TEnv = ROOT.TEnv()
    I_TEnv.ReadFile( out_filename, 0 )
    I_TEnv.Print()
    print 'Exited normally'
    print '='*70
    print
def main():

    # Small testing samples -- do NOT use these for plots!
    # root_file = 'Ntup_Jun22_fullpt_testing_sample.root'
    #    root_file = 'Ntup_Jun22_lowpt_testing_sample.root'

    # Low + high pt sample
    root_file = 'Ntup_Jun22_fullpt_training.root'

    # Only low pt sample
    # root_file = 'Ntup_Jun22_lowpt_training.root'

    ntup_path = os.path.join('/data/userdata/rclsa/ElectronTrees/')
    datestr = strftime('%b%d')

    if not os.path.isdir(ntup_path):
        print 'Error: "{0}"" is not a directory'.format(ntup_path)
    physical_path = lambda input_root_file: os.path.join(
        ntup_path, input_root_file)

    for particle in ['electron', 'photon']:

        # Instantiate the Config class which prints a .config file
        config = Config()

        config.Name = 'Config_' + particle + '_' + datestr

        config.InputFiles = physical_path(root_file)
        config.Tree = 'een_analyzer/{0}Tree'.format(particle.capitalize())

        ########################################
        # BDT settings
        ########################################

        config.Options = [
            "MinEvents=200",
            "Shrinkage=0.1",
            "NTrees=1000",
            "MinSignificance=5.0",
            # "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )", # <-- What to do?
            "EventWeight=1",  # <-- No one really likes the weights
        ]

        config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
        config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
        config.HistoConfig = "jobs/dummy_Histo.config"

        config.CutBase = "eventNumber%2==0"
        config.CutEB = "scIsEB"
        config.CutEE = "!scIsEB"
        config.CutError = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"

        # Add an additional cut so that the regression is fast
        # NtupIDcut = 10000
        # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
        # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
        # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )

        ########################################
        # Order tree branches
        ########################################

        common_vars = [

            # ======================================
            # Common variables

            # 'pt',            # RCLSA: you cannot use the result of the previous training for the new one
            # 'nVtx',          # rho should be enough information for the BDT
            'scRawEnergy',
            # 'scEta',         # Requires alignment information; use crystal number of the seed instead
            # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
            'scEtaWidth',
            'scPhiWidth',
            #            'scSeedRawEnergy/scRawEnergy',  # RCLSA: Redundant with the one below
            'full5x5_e5x5/scRawEnergy',
            'hadronicOverEm',
            'rhoValue',
            'delEtaSeed',
            'delPhiSeed',

            # ======================================
            # Showershape variables

            # Use full 5x5 instead
            # 'r9',
            # 'eHorizontal',
            # 'eVertical',
            # 'sigmaIetaIeta',
            # 'sigmaIetaIphi',
            # 'sigmaIphiIphi',
            # 'e5x5',
            # 'e3x3',
            # 'eMax',
            # 'e2nd',
            # 'eTop',
            # 'eBottom',
            # 'eLeft',
            # 'eRight',
            # 'e2x5Max',
            # 'e2x5Left',
            # 'e2x5Right',
            # 'e2x5Top',
            # 'e2x5Bottom',

            # Normalization to scRawEnergy necessary?
            'full5x5_r9',
            #            'full5x5_eHorizontal',   # RCLSA: Redundant
            #            'full5x5_eVertical',     # RCLSA: Redundant
            'full5x5_sigmaIetaIeta',
            'full5x5_sigmaIetaIphi',
            'full5x5_sigmaIphiIphi',
            # 'full5x5_e5x5',               # RCLSA: Use ratios
            # 'full5x5_e3x3/full5x5_e5x5',  # RCLSA: Redundant, this is R9
            'full5x5_eMax/full5x5_e5x5',
            'full5x5_e2nd/full5x5_e5x5',
            'full5x5_eTop/full5x5_e5x5',
            'full5x5_eBottom/full5x5_e5x5',
            'full5x5_eLeft/full5x5_e5x5',
            'full5x5_eRight/full5x5_e5x5',
            'full5x5_e2x5Max/full5x5_e5x5',
            'full5x5_e2x5Left/full5x5_e5x5',
            'full5x5_e2x5Right/full5x5_e5x5',
            'full5x5_e2x5Top/full5x5_e5x5',
            'full5x5_e2x5Bottom/full5x5_e5x5',

            # ======================================
            # Saturation variables
            'N_SATURATEDXTALS',
            #            'seedIsSaturated',   # RCLSA: probably overkill
            #            'seedCrystalEnergy/scSeedRawEnergy',   # RCLSA: There is only 1/1e6 cases in which the max energy is not the seed

            # ======================================
            # Cluster variables
            'N_ECALClusters',
            #            'clusterMaxDR',          # RCLSA Very mismodelled variables
            #            'clusterMaxDRDPhi',
            #            'clusterMaxDRDEta',
            #            'clusterMaxDRRawEnergy',
            'clusterRawEnergy[0]/scRawEnergy',
            'clusterRawEnergy[1]/scRawEnergy',
            'clusterRawEnergy[2]/scRawEnergy',
            'clusterDPhiToSeed[0]',
            'clusterDPhiToSeed[1]',
            'clusterDPhiToSeed[2]',
            'clusterDEtaToSeed[0]',
            'clusterDEtaToSeed[1]',
            'clusterDEtaToSeed[2]',
        ]

        config.VariablesEB = common_vars + [
            # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
            # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
            'iEtaCoordinate',
            'iPhiCoordinate',
            'iEtaMod5',
            'iPhiMod2',
            'iEtaMod20',
            'iPhiMod20',
        ]

        config.VariablesEE = common_vars + [
            # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
            # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
            'iXCoordinate',
            'iYCoordinate',
            'scPreshowerEnergy/scRawEnergy',
            'preshowerEnergyPlane1/scRawEnergy',
            'preshowerEnergyPlane2/scRawEnergy',
        ]

        print 'Using the following branches for EE:'
        print '    ' + '\n    '.join(config.VariablesEE)
        print 'Using the following branches for EB:'
        print '    ' + '\n    '.join(config.VariablesEB)

        ########################################
        # Ep combination
        ########################################

        # Only do the combination for the electron
        if particle == 'electron':

            config.DoCombine = "True"

            config.TargetComb = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
            config.CutComb = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

            config.VariablesComb = [
                '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                'BDTerror/BDTresponse',
                'trkMomentum',
                'trkMomentumRelError',
                'BDTerror/BDTresponse/trkMomentumRelError',
                '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
                ('( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *'
                 +
                 'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)'
                 ),
                'eleEcalDriven',
                'eleTrackerDriven',
                'eleClass',
                'scIsEB',
            ]

        else:
            config.DoCombine = "False"

        ########################################
        # Output
        ########################################

        # Print all branches as a check
        print "\nAll branches in root file:"
        Read_branches_from_rootfile(physical_path(root_file), config.Tree)

        config.Parse()
Beispiel #11
0
def Make_conf(Verbose=True):


    parser = argparse.ArgumentParser()
    parser.add_argument( '--inputrootfile', '-i', type=str, help='Path to root file',
        # default='/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples/Ntup_Jul22_fullpt_training.root'
        default='/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples/Ntup_Jul22_fullpt_training.root'
        )
    parser.add_argument(
        '--particle', metavar='N', type=str, nargs='+', help='Specify particles',
        default=['electron','photon'],choices=['electron','photon']
        )
    parser.add_argument(
        '--region', metavar='N', type=str, nargs='+', help='Specify regions',
        default=['EB','EE'],choices=['EE','EB']
        )
    parser.add_argument(
        '-n', '--name', type=str, default='NONE', help='Append a string at the end of the name of this config'
        )
    parser.add_argument( '--fast', action='store_true', help='Change some BDT options to be faster (but maybe less precise)')
    args = parser.parse_args()


    # root_file = 'Ntup_Jul15_fullpt_training.root'

    # Small testing samples -- do NOT use these for plots!
    #    root_file = 'Ntup_Jun22_lowpt_testing_sample.root'
    # Low + high pt sample
    # root_file = 'Ntup_Jun22_fullpt_training.root'    
    # Only low pt sample
    # root_file = 'Ntup_Jun22_lowpt_training.root'

    # ------------------------------
    # 22 July samples - Latest set of branches
    # root_file = 'Ntup_Jul22_fullpt_testing_sample.root' # ONLY FOR QUICK TESTS
    root_file = 'Ntup_Jul22_fullpt_training.root'

    # ntup_path = os.path.join( '/data/userdata/rclsa/ElectronTrees/Jul17/' )
    # ntup_path = os.path.join( os.environ['CMSSW_BASE'], 'src/NTuples' )

    if os.environ['HOSTNAME'] == 't3ui17':
        ntup_path = os.path.join( '/mnt/t3nfs01/data01/shome/tklijnsm/Samples/RegressionSamples', '22Jul_samples' )
    else:
        ntup_path = '/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples'

    datestr = strftime( '%b%d' )

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )


    return_configs = []

    # for region in [ 'EB', 'EE' ]:
    for region in args.region:
        for ECAL_AND_TRK in [ False ]:
            # for particle in [ 'electron', 'photon' ]:
            for particle in args.particle:
                if ECAL_AND_TRK and particle=='photon': continue # Photon doesn't have TRK vars

                # Instantiate the Config class which prints a .config file
                config = Config()

                config.Name       = 'Config_' + datestr + '_' + particle + '_' + region


                if ECAL_AND_TRK:
                    config.Name += '_ECALTRK'
                else:
                    config.Name += '_ECALonly'

                if args.name and args.name!='NONE' : config.Name += '_' + args.name


                config.InputFiles = physical_path( root_file )
                config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )


                ########################################
                # BDT settings
                ########################################

                # config.Options = [
                #     "MinEvents=200",
                #     "Shrinkage=0.1",
                #     "NTrees=1000",
                #     "MinSignificance=5.0",
                #     "EventWeight=1",
                #     ]

                config.Options = [
                    "MinEvents=300",
                    "Shrinkage=0.15",
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    "EventWeight=1",
                    ]

                # Set the target - be careful to include the tracker energy in the target for the Ep combination
                if ECAL_AND_TRK:
                    config.Target           = "genEnergy * (ECALweight + TRKweight) / ( scRawEnergy*ECALweight + scPreshowerEnergy*ECALweight + trkMomentum*TRKweight )"
                else:
                    config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
                # config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

                config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
                config.HistoConfig      = "jobs/dummy_Histo.config"
                
                config.CutEB            = "scIsEB"
                config.CutEE            = "!scIsEB"

                if region == 'EB':
                    config.DoEB         = "True"
                else:
                    config.DoEB         = "False"


                # ======================================
                # Sample division - need a part for the ECAL-only training, and a part for the combination


                # WITH EP COMBINATION TRAINING IN SERIES, THIS IS NO LONGER NEEDED

                # if particle == 'electron':
                #     # 66% for the main BDT - divide the sample in divideNumber pieces, and use all but one piece for the main BDT
                #     divideNumber            = 3
                #     config.CutBase          = "eventNumber%{0}!=0".format( divideNumber )

                #     # 17% for combination, 17% for error
                #     config.CutComb          = "eventNumber%{0}==0 && eventNumber%{1}==0".format( divideNumber, 2*divideNumber )
                #     config.CutError         = "eventNumber%{0}==0 && eventNumber%{1}!=0".format( divideNumber, 2*divideNumber )
                # elif particle == 'photon':
                #     config.CutBase          = '1.0'
                #     config.CutComb          = '1.0'
                #     config.CutError         = '1.0'


                config.CutBase          = '1.0'
                config.CutComb          = '1.0'
                config.CutError         = '1.0'


                # # TEMPORARY: cut events drastically for test mode
                config.CutBase  += " && NtupID<4000"
                config.CutComb  += " && NtupID<4000"
                config.CutError += " && NtupID<4000"


                ########################################
                # Order tree branches
                ########################################


                # Agreed list on November 23:

                # eval[0]  = raw_energy;
                # eval[1]  = the_sc->etaWidth();
                # eval[2]  = the_sc->phiWidth(); 
                # eval[3]  = full5x5_ess.e5x5/raw_energy;
                # eval[4]  = ele.hcalOverEcalBc();
                # eval[5]  = rhoValue_;
                # eval[6]  = theseed->eta() - the_sc->position().Eta();
                # eval[7]  = reco::deltaPhi( theseed->phi(),the_sc->position().Phi());
                # eval[8]  = full5x5_ess.r9;
                # eval[9]  = full5x5_ess.sigmaIetaIeta;
                # eval[10]  = full5x5_ess.sigmaIetaIphi;
                # eval[11]  = full5x5_ess.sigmaIphiIphi;
                # eval[12]  = full5x5_ess.eMax/full5x5_ess.e5x5;
                # eval[13]  = full5x5_ess.e2nd/full5x5_ess.e5x5;
                # eval[14]  = full5x5_ess.eTop/full5x5_ess.e5x5;
                # eval[15]  = full5x5_ess.eBottom/full5x5_ess.e5x5;
                # eval[16]  = full5x5_ess.eLeft/full5x5_ess.e5x5;
                # eval[17]  = full5x5_ess.eRight/full5x5_ess.e5x5;
                # eval[18]  = EcalClusterToolsT<true>::e2x5Max(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
                # eval[19]  = EcalClusterToolsT<true>::e2x5Left(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
                # eval[20]  = EcalClusterToolsT<true>::e2x5Right(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
                # eval[21]  = EcalClusterToolsT<true>::e2x5Top(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
                # eval[22]  = EcalClusterToolsT<true>::e2x5Bottom(*theseed, &*ecalRecHits, topology_)/full5x5_ess.e5x5;
                # eval[23]  = N_SATURATEDXTALS;
                # eval[24]  = std::max(0,numberOfClusters);
                # eval[25] = clusterRawEnergy[0]/raw_energy;
                # eval[26] = clusterRawEnergy[1]/raw_energy;
                # eval[27] = clusterRawEnergy[2]/raw_energy;
                # eval[28] = clusterDPhiToSeed[0];
                # eval[29] = clusterDPhiToSeed[1];
                # eval[30] = clusterDPhiToSeed[2];
                # eval[31] = clusterDEtaToSeed[0];
                # eval[32] = clusterDEtaToSeed[1];
                # eval[33] = clusterDEtaToSeed[2];

                # eval[34] = ieta;
                # eval[35] = iphi;
                # eval[36] = (ieta-signieta)%5;
                # eval[37] = (iphi-1)%2;
                # eval[38] = (abs(ieta)<=25)*((ieta-signieta)) + (abs(ieta)>25)*((ieta-26*signieta)%20);  
                # eval[39] = (iphi-1)%20;

                # eval[34] = raw_es_energy/raw_energy;
                # eval[35] = the_sc->preshowerEnergyPlane1()/raw_energy;
                # eval[36] = the_sc->preshowerEnergyPlane2()/raw_energy;
                # eval[37] = eeseedid.ix();
                # eval[38] = eeseedid.iy();


                common_vars = [

                    # ======================================
                    # Common variables

                    # 'pt',            # RCLSA: you cannot use the result of the previous training for the new one
                    # 'nVtx',          # rho should be enough information for the BDT
                    # 'scEta',         # Requires alignment information; use crystal number of the seed instead
                    # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
                    #            'scSeedRawEnergy/scRawEnergy',  # RCLSA: Redundant with the one below

                    'scRawEnergy',
                    'scEtaWidth',
                    'scPhiWidth',
                    'full5x5_e5x5/scRawEnergy',
                    'hadronicOverEm',
                    'rhoValue',
                    'delEtaSeed',
                    'delPhiSeed',


                    # ======================================
                    # Showershape variables

                    # Use full 5x5 instead
                    # 'r9',
                    # 'eHorizontal',
                    # 'eVertical',
                    # 'sigmaIetaIeta',
                    # 'sigmaIetaIphi',
                    # 'sigmaIphiIphi',
                    # 'e5x5',
                    # 'e3x3',
                    # 'eMax',
                    # 'e2nd',
                    # 'eTop',
                    # 'eBottom',
                    # 'eLeft',
                    # 'eRight',
                    # 'e2x5Max',
                    # 'e2x5Left',
                    # 'e2x5Right',
                    # 'e2x5Top',
                    # 'e2x5Bottom',

                    # Normalization to scRawEnergy necessary?

                    'full5x5_r9',
                    #            'full5x5_eHorizontal',   # RCLSA: Redundant
                    #            'full5x5_eVertical',     # RCLSA: Redundant
                    'full5x5_sigmaIetaIeta',
                    'full5x5_sigmaIetaIphi',
                    'full5x5_sigmaIphiIphi',
                    # 'full5x5_e5x5',               # RCLSA: Use ratios
                    # 'full5x5_e3x3/full5x5_e5x5',  # RCLSA: Redundant, this is R9
                    'full5x5_eMax/full5x5_e5x5',
                    'full5x5_e2nd/full5x5_e5x5',
                    'full5x5_eTop/full5x5_e5x5',
                    'full5x5_eBottom/full5x5_e5x5',
                    'full5x5_eLeft/full5x5_e5x5',
                    'full5x5_eRight/full5x5_e5x5',
                    'full5x5_e2x5Max/full5x5_e5x5',
                    'full5x5_e2x5Left/full5x5_e5x5',
                    'full5x5_e2x5Right/full5x5_e5x5',
                    'full5x5_e2x5Top/full5x5_e5x5',
                    'full5x5_e2x5Bottom/full5x5_e5x5',


                    # ======================================
                    # Saturation variables

                    'N_SATURATEDXTALS',
                    #            'seedIsSaturated',   # RCLSA: probably overkill
                    #            'seedCrystalEnergy/scSeedRawEnergy',   # RCLSA: There is only 1/1e6 cases in which the max energy is not the seed


                    # ======================================
                    # Cluster variables

                    'N_ECALClusters',
                    #            'clusterMaxDR',          # RCLSA Very mismodelled variables
                    #            'clusterMaxDRDPhi',
                    #            'clusterMaxDRDEta',
                    #            'clusterMaxDRRawEnergy',

                    'clusterRawEnergy[0]/scRawEnergy',
                    'clusterRawEnergy[1]/scRawEnergy',
                    'clusterRawEnergy[2]/scRawEnergy',
                    'clusterDPhiToSeed[0]',
                    'clusterDPhiToSeed[1]',
                    'clusterDPhiToSeed[2]',
                    'clusterDEtaToSeed[0]',
                    'clusterDEtaToSeed[1]',
                    'clusterDEtaToSeed[2]',

                    ]

                if ECAL_AND_TRK:
                    # ADD THE TRK VARIABLES TO THE MAIN BDT
                    # Output should be compared to ECAL-only BDT output
                    common_vars += [

                        # '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                        # This is simply the corrected energy, I don't we need to pass this (The BDT already has the target in there)

                        # 'BDTerror/BDTresponse',
                        # I guess the error we also don't have to pass -- The BDT will already be conscious of the error

                        'trkMomentumRelError',

                        # Replace this simply by the trkMomentum only
                        # 'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                        'trkMomentum',

                        'eleEcalDriven',
                        # 'full5x5_r9',
                        'fbrem',
                        'gsfchi2',
                        'gsfndof', 
                        'trkEta',
                        'trkPhi'
                        ]

                config.VariablesEB = common_vars + [
                    # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iEtaCoordinate',
                    'iPhiCoordinate',
                    'iEtaMod5',
                    'iPhiMod2',
                    'iEtaMod20',
                    'iPhiMod20',
                    ]

                config.VariablesEE = common_vars + [
                    # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iXCoordinate',
                    'iYCoordinate',
                    'scPreshowerEnergy/scRawEnergy',
                    # 'preshowerEnergyPlane1/scRawEnergy',
                    # 'preshowerEnergyPlane2/scRawEnergy',
                    ]

                if Verbose:
                    print '\n' + '-'*70
                    print 'Making config file ' + config.Name + '.config'
                    print '  Using the following branches for EE:'
                    print '    ' + '\n    '.join( config.VariablesEE )
                    print '  Using the following branches for EB:'
                    print '    ' + '\n    '.join( config.VariablesEB )


                ########################################
                # Ep combination
                ########################################

                # NOVEMBER 25: NO LONGER NECESSARY TO RUN OLD EP COMBO
                config.DoCombine        = "False"


                # # Only do the combination for the electron AND there are no tracking variables
                # if particle == 'electron' and not ECAL_AND_TRK:

                #     config.DoCombine        = "True"

                #     config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
                #     config.TargetError      = "1.253*abs(BDTresponse - genEnergy/(scRawEnergy+scPreshowerEnergy))"

                #     config.VariablesComb = [
                #         '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                #         'BDTerror/BDTresponse',
                #         'trkMomentumRelError',
                #         'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                #         'eleEcalDriven',
                #         'full5x5_r9',
                #         'fbrem',
                #         'gsfchi2',
                #         'gsfndof', 
                #         'trkEta',
                #         'trkPhi'     # The best way to describe cracks is to use the track (unbiased) directorion
                #        # 'trkMomentum',                                # RCLSA Again, let us choose one absolute scale and the rest be relative
                #        # 'BDTerror/BDTresponse/trkMomentumRelError',   
                #        # ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
                #        #   'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
                #        # 'eleClass',
                #        # 'scIsEB',
                #         ]
                
                # else:
                #     config.DoCombine        = "False"




                ########################################
                # Output
                ########################################

                # if Verbose:
                #     # Print all branches as a check
                #     print "\nAll branches in root file:"
                #     Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

                config.Parse()

                # # Test if the config file can be read by ROOT TEnv
                # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
                # I_TEnv = ROOT.TEnv()
                # I_TEnv.ReadFile( out_filename, 0 )
                # I_TEnv.Print()
                # print 'Exited normally'
                # print '='*70
                # print

                return_configs.append( config )

    return return_configs
def Make_conf(Verbose=True):

    # root_file = 'Ntup_Jul15_fullpt_training.root'

    # Small testing samples -- do NOT use these for plots!
    #    root_file = 'Ntup_Jun22_lowpt_testing_sample.root'
    # Low + high pt sample
    # root_file = 'Ntup_Jun22_fullpt_training.root'    
    # Only low pt sample
    # root_file = 'Ntup_Jun22_lowpt_training.root'

    # ------------------------------
    # 22 July samples - Latest set of branches
    # root_file = 'Ntup_Jul22_fullpt_testing_sample.root' # ONLY FOR QUICK TESTS
    root_file = 'Ntup_Jul22_fullpt_training.root'

    # ntup_path = os.path.join( '/data/userdata/rclsa/ElectronTrees/Jul17/' )
    # ntup_path = os.path.join( os.environ['CMSSW_BASE'], 'src/NTuples' )

    if os.environ['HOSTNAME'] == 't3ui17':
        ntup_path = os.path.join( '/mnt/t3nfs01/data01/shome/tklijnsm/Samples/RegressionSamples', '22Jul_samples' )
    else:
        ntup_path = '/afs/cern.ch/work/t/tklijnsm/public/CMSSW_8_0_4/src/NTuples'

    datestr = strftime( '%b%d' )

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )


    return_configs = []

    for region in [ 'EB', 'EE' ]:


        # MODIFIED VERSION
        # - Only ECAL_AND_TRK configs made
        # - Target set to unweighted

        # for ECAL_AND_TRK in [ False, True ]:
        for ECAL_AND_TRK in [ True ]:
        
            for particle in [ 'electron', 'photon' ]:
                if ECAL_AND_TRK and particle=='photon': continue # Photon doesn't have TRK vars

                # Instantiate the Config class which prints a .config file
                config = Config()

                config.Name       = 'Config_' + datestr + '_' + particle + '_' + region

                # MODIFIED: Force SAMETGT name
                # if ECAL_AND_TRK:
                #     config.Name += '_ECALTRK'
                # else:
                #     config.Name += '_ECALonly'
                config.Name += '_SAMETGT'

                config.InputFiles = physical_path( root_file )
                config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )


                ########################################
                # BDT settings
                ########################################

                config.Options = [
                    "MinEvents=200",
                    "Shrinkage=0.1",
                    # "NTrees=2000", # <-- Moved up from 1000 to include extra tracker effects
                    "NTrees=1000",
                    "MinSignificance=5.0",
                    # "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )", # <-- What to do?
                    "EventWeight=1", # <-- No one really likes the weights
                    ]

                # MODIFIED: Just force the normal target

                # # Set the target - be careful to include the tracker energy in the target for the Ep combination
                # if ECAL_AND_TRK:
                #     config.Target           = "genEnergy * (ECALweight + TRKweight) / ( scRawEnergy*ECALweight + scPreshowerEnergy*ECALweight + trkMomentum*TRKweight )"
                # else:
                #     config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
                # # config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

                config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"

                config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
                config.HistoConfig      = "jobs/dummy_Histo.config"
                
                config.CutEB            = "scIsEB"
                config.CutEE            = "!scIsEB"

                if region == 'EB':
                    config.DoEB         = "True"
                else:
                    config.DoEB         = "False"


                # ======================================
                # Sample division - need a part for the ECAL-only training, and a part for the combination

                # 80% for the main BDT - divide the sample in divideNumber pieces, and use all but one piece for the main BDT
                divideNumber            = 3
                config.CutBase          = "eventNumber%{0}!=0".format( divideNumber )

                # 10% for combination, 10% for error
                config.CutComb          = "eventNumber%{0}==0 && eventNumber%{1}==0".format( divideNumber, 2*divideNumber )
                config.CutError         = "eventNumber%{0}==0 && eventNumber%{1}!=0".format( divideNumber, 2*divideNumber )


                config.CutBase  += " && NtupID<5000"
                config.CutComb  += " && NtupID<5000"
                config.CutError += " && NtupID<5000"


                ########################################
                # Order tree branches
                ########################################

                common_vars = [

                    # ======================================
                    # Common variables

                    # 'pt',            # RCLSA: you cannot use the result of the previous training for the new one
                    # 'nVtx',          # rho should be enough information for the BDT
                    # 'scEta',         # Requires alignment information; use crystal number of the seed instead
                    # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
                    #            'scSeedRawEnergy/scRawEnergy',  # RCLSA: Redundant with the one below

                    'scRawEnergy',
                    'scEtaWidth',
                    'scPhiWidth',
                    'full5x5_e5x5/scRawEnergy',
                    'hadronicOverEm',
                    'rhoValue',
                    'delEtaSeed',
                    'delPhiSeed',


                    # ======================================
                    # Showershape variables

                    # Use full 5x5 instead
                    # 'r9',
                    # 'eHorizontal',
                    # 'eVertical',
                    # 'sigmaIetaIeta',
                    # 'sigmaIetaIphi',
                    # 'sigmaIphiIphi',
                    # 'e5x5',
                    # 'e3x3',
                    # 'eMax',
                    # 'e2nd',
                    # 'eTop',
                    # 'eBottom',
                    # 'eLeft',
                    # 'eRight',
                    # 'e2x5Max',
                    # 'e2x5Left',
                    # 'e2x5Right',
                    # 'e2x5Top',
                    # 'e2x5Bottom',

                    # Normalization to scRawEnergy necessary?

                    'full5x5_r9',
                    #            'full5x5_eHorizontal',   # RCLSA: Redundant
                    #            'full5x5_eVertical',     # RCLSA: Redundant
                    'full5x5_sigmaIetaIeta',
                    'full5x5_sigmaIetaIphi',
                    'full5x5_sigmaIphiIphi',
                    # 'full5x5_e5x5',               # RCLSA: Use ratios
                    # 'full5x5_e3x3/full5x5_e5x5',  # RCLSA: Redundant, this is R9
                    'full5x5_eMax/full5x5_e5x5',
                    'full5x5_e2nd/full5x5_e5x5',
                    'full5x5_eTop/full5x5_e5x5',
                    'full5x5_eBottom/full5x5_e5x5',
                    'full5x5_eLeft/full5x5_e5x5',
                    'full5x5_eRight/full5x5_e5x5',
                    'full5x5_e2x5Max/full5x5_e5x5',
                    'full5x5_e2x5Left/full5x5_e5x5',
                    'full5x5_e2x5Right/full5x5_e5x5',
                    'full5x5_e2x5Top/full5x5_e5x5',
                    'full5x5_e2x5Bottom/full5x5_e5x5',


                    # ======================================
                    # Saturation variables

                    'N_SATURATEDXTALS',
                    #            'seedIsSaturated',   # RCLSA: probably overkill
                    #            'seedCrystalEnergy/scSeedRawEnergy',   # RCLSA: There is only 1/1e6 cases in which the max energy is not the seed


                    # ======================================
                    # Cluster variables

                    'N_ECALClusters',
                    #            'clusterMaxDR',          # RCLSA Very mismodelled variables
                    #            'clusterMaxDRDPhi',
                    #            'clusterMaxDRDEta',
                    #            'clusterMaxDRRawEnergy',

                    'clusterRawEnergy[0]/scRawEnergy',
                    'clusterRawEnergy[1]/scRawEnergy',
                    'clusterRawEnergy[2]/scRawEnergy',
                    'clusterDPhiToSeed[0]',
                    'clusterDPhiToSeed[1]',
                    'clusterDPhiToSeed[2]',
                    'clusterDEtaToSeed[0]',
                    'clusterDEtaToSeed[1]',
                    'clusterDEtaToSeed[2]',

                    ]

                if ECAL_AND_TRK:
                    # ADD THE TRK VARIABLES TO THE MAIN BDT
                    # Output should be compared to ECAL-only BDT output
                    common_vars += [

                        # '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                        # This is simply the corrected energy, I don't we need to pass this (The BDT already has the target in there)

                        # 'BDTerror/BDTresponse',
                        # I guess the error we also don't have to pass -- The BDT will already be conscious of the error

                        'trkMomentumRelError',

                        # Replace this simply by the trkMomentum only
                        # 'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                        'trkMomentum',

                        'eleEcalDriven',
                        # 'full5x5_r9',
                        'fbrem',
                        'gsfchi2',
                        'gsfndof', 
                        'trkEta',
                        'trkPhi'
                        ]

                config.VariablesEB = common_vars + [
                    # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iEtaCoordinate',
                    'iPhiCoordinate',
                    'iEtaMod5',
                    'iPhiMod2',
                    'iEtaMod20',
                    'iPhiMod20',
                    ]

                config.VariablesEE = common_vars + [
                    # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
                    'iXCoordinate',
                    'iYCoordinate',
                    'scPreshowerEnergy/scRawEnergy',
                    'preshowerEnergyPlane1/scRawEnergy',
                    'preshowerEnergyPlane2/scRawEnergy',
                    ]

                if Verbose:
                    print '\n' + '-'*70
                    print 'Making config file ' + config.Name + '.config'
                    print '  Using the following branches for EE:'
                    print '    ' + '\n    '.join( config.VariablesEE )
                    print '  Using the following branches for EB:'
                    print '    ' + '\n    '.join( config.VariablesEB )


                ########################################
                # Ep combination
                ########################################

                # Only do the combination for the electron AND there are no tracking variables
                if particle == 'electron' and not ECAL_AND_TRK:

                    config.DoCombine        = "True"

                    config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
                    config.TargetError      = "1.253*abs(BDTresponse - genEnergy/(scRawEnergy+scPreshowerEnergy))"

                    config.VariablesComb = [
                        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                        'BDTerror/BDTresponse',
                        'trkMomentumRelError',
                        'trkMomentum/(( scRawEnergy + scPreshowerEnergy )*BDTresponse)',
                        'eleEcalDriven',
                        'full5x5_r9',
                        'fbrem',
                        'gsfchi2',
                        'gsfndof', 
                        'trkEta',
                        'trkPhi'     # The best way to describe cracks is to use the track (unbiased) directorion
                       # 'trkMomentum',                                # RCLSA Again, let us choose one absolute scale and the rest be relative
                       # 'BDTerror/BDTresponse/trkMomentumRelError',   
                       # ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
                       #   'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
                       # 'eleClass',
                       # 'scIsEB',
                        ]
                
                else:
                    config.DoCombine        = "False"

                # # Not necessary if the TRK vars in the main BDT
                # config.DoCombine        = "False"


                ########################################
                # Output
                ########################################

                # if Verbose:
                #     # Print all branches as a check
                #     print "\nAll branches in root file:"
                #     Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

                config.Parse()

                # # Test if the config file can be read by ROOT TEnv
                # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
                # I_TEnv = ROOT.TEnv()
                # I_TEnv.ReadFile( out_filename, 0 )
                # I_TEnv.Print()
                # print 'Exited normally'
                # print '='*70
                # print

                return_configs.append( config )

    return return_configs
def main():

    # Small testing samples -- do NOT use these for plots!
    # fullpt_root_file = 'Ntup_Jun22_fullpt_testing_sample.root'
    # lowpt_root_file  = 'Ntup_Jun22_lowpt_testing_sample.root'

    # Low + high pt sample
    fullpt_root_file = 'Ntup_Jun22_fullpt_training.root'

    # Only low pt sample
    lowpt_root_file = 'Ntup_Jun22_lowpt_training.root'

    ntup_path = os.path.join(os.environ['CMSSW_BASE'], 'src/NTuples')
    datestr = strftime('%b%d')

    if not os.path.isdir(ntup_path):
        print 'Error: "{0}"" is not a directory'.format(ntup_path)
    physical_path = lambda input_root_file: os.path.join(
        ntup_path, input_root_file)

    ########################################
    # BASE CONFIG - This is low pt electrons
    #   Configs for photons and and other pt ranges are created by altering this one
    ########################################

    # Instantiate the Config class which prints a .config file
    base_config = Config()

    base_config.Name = 'Config_electron_lowpt_' + datestr

    base_config.InputFiles = physical_path(lowpt_root_file)
    base_config.Tree = 'een_analyzer/ElectronTree'

    ########################################
    # BDT settings
    ########################################

    base_config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )",  # <-- What to do?
    ]

    base_config.Target = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
    base_config.TargetError = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
    base_config.HistoConfig = "jobs/dummy_Histo.config"

    base_config.CutBase = "eventNumber%2==0"
    base_config.CutEB = "scIsEB"
    base_config.CutEE = "!scIsEB"
    base_config.CutError = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"

    # Add an additional cut so that the regression is fast
    # NtupIDcut = 10000
    # base_config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
    # base_config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
    # base_config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )

    ########################################
    # Order tree branches
    ########################################

    common_vars = [

        # ======================================
        # Common variables
        'pt',
        # 'nVtx',          # rho should be enough information for the BDT
        'scRawEnergy',
        # 'scEta',         # Requires alignment information; use crystal number of the seed instead
        # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
        'scEtaWidth',
        'scPhiWidth',
        'scSeedRawEnergy/scRawEnergy',
        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',

        # ======================================
        # Showershape variables

        # Use full 5x5 instead
        # 'r9',
        # 'eHorizontal',
        # 'eVertical',
        # 'sigmaIetaIeta',
        # 'sigmaIetaIphi',
        # 'sigmaIphiIphi',
        # 'e5x5',
        # 'e3x3',
        # 'eMax',
        # 'e2nd',
        # 'eTop',
        # 'eBottom',
        # 'eLeft',
        # 'eRight',
        # 'e2x5Max',
        # 'e2x5Left',
        # 'e2x5Right',
        # 'e2x5Top',
        # 'e2x5Bottom',

        # Normalization to scRawEnergy necessary?
        'full5x5_r9',
        'full5x5_eHorizontal',
        'full5x5_eVertical',
        'full5x5_sigmaIetaIeta',
        'full5x5_sigmaIetaIphi',
        'full5x5_sigmaIphiIphi',
        'full5x5_e5x5',
        'full5x5_e3x3',
        'full5x5_eMax',
        'full5x5_e2nd',
        'full5x5_eTop',
        'full5x5_eBottom',
        'full5x5_eLeft',
        'full5x5_eRight',
        'full5x5_e2x5Max',
        'full5x5_e2x5Left',
        'full5x5_e2x5Right',
        'full5x5_e2x5Top',
        'full5x5_e2x5Bottom',

        # ======================================
        # Saturation variables
        'N_SATURATEDXTALS',
        'seedIsSaturated',
        'seedCrystalEnergy/scRawEnergy',

        # ======================================
        # Cluster variables
        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy',
        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',
    ]

    base_config.VariablesEB = common_vars + [
        # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
        # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
    ]

    base_config.VariablesEE = common_vars + [
        # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
        # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
        'iXCoordinate',
        'iYCoordinate',
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
    ]

    # print 'Using the following branches for EE:'
    # print '    ' + '\n    '.join( base_config.VariablesEE )
    # print 'Using the following branches for EB:'
    # print '    ' + '\n    '.join( base_config.VariablesEB )

    ########################################
    # Ep combination
    ########################################

    # Only do the combination for the electron
    base_config.DoCombine = "True"

    base_config.TargetComb = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
    base_config.CutComb = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

    base_config.VariablesComb = [
        '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
        'BDTerror/BDTresponse',
        'trkMomentum',
        'trkMomentumRelError',
        'BDTerror/BDTresponse/trkMomentumRelError',
        '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
        ('( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
         'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)'
         ),
        'eleEcalDriven',
        'eleTrackerDriven',
        'eleClass',
        'scIsEB',
    ]

    ########################################
    # Output
    ########################################

    # lowpt electrons - this is simply the base config defined above
    base_config.Parse()

    # fullpt electrons - only change the root file
    base_config.Name = 'Config_electron_fullpt_' + datestr
    base_config.InputFiles = physical_path(fullpt_root_file)
    base_config.Parse()

    # lowpt photons
    base_config.Name = 'Config_photon_lowpt_' + datestr
    base_config.InputFiles = physical_path(lowpt_root_file)
    base_config.Tree = 'een_analyzer/PhotonTree'
    base_config.DoCombine = "False"
    base_config.Parse()

    # fullpt photons
    base_config.Name = 'Config_photon_fullpt_' + datestr
    base_config.InputFiles = physical_path(fullpt_root_file)
    base_config.Tree = 'een_analyzer/PhotonTree'
    base_config.DoCombine = "False"
    base_config.Parse()

    ########################################
    # OLD VARIABLES
    ########################################

    # Remove the max( ..., 0.1, ) from the eventweight
    base_config.Options = [
        "MinEvents=200",
        "Shrinkage=0.1",
        "NTrees=1000",
        "MinSignificance=5.0",
        "EventWeight=min(1,exp(-(genPt-50)/50))",
    ]

    # lowpt electrons
    base_config.Name = 'Config_electron_lowpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path(lowpt_root_file)
    base_config.Tree = 'een_analyzer/ElectronTree'
    base_config.DoCombine = "True"

    OLD_common_electron_vars = [
        'nVtx',
        'scRawEnergy',
        'scEta',
        'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'r9',
        'scSeedRawEnergy/scRawEnergy',
        'eMax',
        'e2nd',
        'eHorizontal',  # 'scSeedLeftRightAsym',
        'eVertical',  # 'scSeedTopBottomAsym',
        'sigmaIetaIeta',
        'sigmaIetaIphi',
        'sigmaIphiIphi',
        'N_ECALClusters',
        'clusterMaxDR',
        'clusterMaxDRDPhi',
        'clusterMaxDRDEta',
        'clusterMaxDRRawEnergy/scRawEnergy',
        'clusterRawEnergy[0]/scRawEnergy',
        'clusterRawEnergy[1]/scRawEnergy',
        'clusterRawEnergy[2]/scRawEnergy',
        'clusterDPhiToSeed[0]',
        'clusterDPhiToSeed[1]',
        'clusterDPhiToSeed[2]',
        'clusterDEtaToSeed[0]',
        'clusterDEtaToSeed[1]',
        'clusterDEtaToSeed[2]',
    ]

    base_config.VariablesEB = OLD_common_electron_vars + [
        'cryEtaCoordinate',
        'cryPhiCoordinate',
        'iEtaCoordinate',
        'iPhiCoordinate',
        # 'scSeedCryEta',
        # 'scSeedCryPhi',
        # 'scSeedCryIetaV2',
        # 'scSeedCryIphiV2',
    ]

    base_config.VariablesEE = OLD_common_electron_vars + [
        'scPreshowerEnergy/scRawEnergy',
        # 'scSeedCryIxV2',
        # 'scSeedCryIyV2',
        'iXCoordinate',
        'iYCoordinate',
    ]

    base_config.Parse()

    # fullpt oldvars
    base_config.Name = 'Config_electron_fullpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path(fullpt_root_file)
    base_config.Parse()

    # lowpt photons
    base_config.Name = 'Config_photon_lowpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path(lowpt_root_file)
    base_config.Tree = 'een_analyzer/PhotonTree'
    base_config.DoCombine = "False"

    OLD_common_photon_vars = [
        'nVtx',
        'scRawEnergy',
        # 'scEta',
        # 'scPhi',
        'scEtaWidth',
        'scPhiWidth',
        'r9',
        'scSeedRawEnergy/scRawEnergy',
        # 'scSeedLeftRightAsym',
        # 'scSeedTopBottomAsym',
        'sigmaIetaIeta',
        'sigmaIetaIphi',
        'sigmaIphiIphi',
        'N_ECALClusters',
        'hadronicOverEm',
        'rhoValue',
        'delEtaSeed',
        'delPhiSeed',
        'e3x3/e5x5',
        'eMax/e5x5',
        'e2nd/e5x5',
        'eTop/e5x5',
        'eBottom/e5x5',
        'eLeft/e5x5',
        'eRight/e5x5',
        'e2x5Max/e5x5',
        'e2x5Left/e5x5',
        'e2x5Right/e5x5',
        'e2x5Top/e5x5',
        'e2x5Bottom/e5x5',
    ]

    base_config.VariablesEB = OLD_common_photon_vars + [
        'e5x5/scSeedRawEnergy',
        'iEtaCoordinate',
        'iPhiCoordinate',
        'iEtaMod5',
        'iPhiMod2',
        'iEtaMod20',
        'iPhiMod20',
    ]

    base_config.VariablesEE = OLD_common_photon_vars + [
        'scPreshowerEnergy/scRawEnergy',
        'preshowerEnergyPlane1/scRawEnergy',
        'preshowerEnergyPlane2/scRawEnergy',
        'iXCoordinate',
        'iYCoordinate',
    ]

    base_config.Parse()

    # fullpt photons
    base_config.Name = 'Config_photon_fullpt_' + datestr + '_OLDVARS'
    base_config.InputFiles = physical_path(fullpt_root_file)
    base_config.Parse()

    # Print all branches as a check
    print "\nAll branches in lowpt root file:"
    Read_branches_from_rootfile(physical_path(lowpt_root_file),
                                base_config.Tree)
def MakeConf(Verbose=True):

    parser = argparse.ArgumentParser()
    parser.add_argument( '--inputrootfile', '-i', type=str, help='Path to root file',
        # default='../applyRegression/Config_Sep30_electron_EB_ECALonly_appliedRegression_training.root'
        # default='../Config_Oct25_electron_EB_ECALonly_appliedRegression_training_ptWeight.root'
        )
    parser.add_argument(
        '--region', metavar='N', type=str, nargs='+', help='Specify regions',
        default=['EB','EE'],choices=['EE','EB']
        )
    parser.add_argument(
        '-n', '--name', type=str, default='NONE', help='Append a string at the end of the name of this config'
        )
    parser.add_argument( '--fast', action='store_true', help='Change some BDT options to be faster (but maybe less precise)')
    args = parser.parse_args()

    datestr = strftime( '%b%d' )

    # Photon does not have TRK vars
    particle = 'electron'

    # Reads off the name of this .py file, so it's clear what made this.
    moduleName = os.path.basename(__file__).replace('MakeConf_','').replace('.py','')

    return_configs = []
    for region in args.region:

        # Instantiate the Config class which prints a .config file
        config = Config()

        config.Name       = 'Config_' + datestr + '_' + particle + '_' + region + '_' + moduleName

        # Append a string to the name if given by the user
        if not args.name == 'NONE':
            config.Name += '_' + args.name

        if args.fast: config.Name += '_FastOptions'

        config.InputFiles = os.path.abspath(args.inputrootfile)

        # config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )
        config.Tree       = 'een_analyzer/correction' # <-- May want to change this some time.


        ########################################
        # BDT settings
        ########################################

        if not args.fast:
            config.Options = [
                "MinEvents=200",
                "Shrinkage=0.1",
                "NTrees=1000",
                "MinSignificance=5.0",
                "EventWeight=1",
                ]
        else:
            config.Options = [
                "MinEvents=300", # Down from 200
                "Shrinkage=0.2",
                "NTrees=1000",
                "MinSignificance=5.0", # Down from 5.0
                "EventWeight=1",
                ]

        # config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
        config.Target           = "(genEnergy * (trkMomentum*trkMomentum*trkMomentumRelError*trkMomentumRelError + (scRawEnergy+scPreshowerEnergy)*(scRawEnergy+scPreshowerEnergy)*resolution*resolution) / ( (scRawEnergy+scPreshowerEnergy)*response*trkMomentum*trkMomentum*trkMomentumRelError*trkMomentumRelError + trkMomentum*(scRawEnergy+scPreshowerEnergy)*(scRawEnergy+scPreshowerEnergy)*resolution*resolution ))"

        # Probably neither of these are necessary
        config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
        config.HistoConfig      = "jobs/dummy_Histo.config"
        
        config.CutEB            = "scIsEB"
        config.CutEE            = "!scIsEB"

        if region == 'EB':
            config.DoEB         = "True"
        else:
            config.DoEB         = "False"


        # # ======================================
        # # Sample division - need a part for the ECAL-only training, and a part for the combination

        # # 80% for the main BDT - divide the sample in divideNumber pieces, and use all but one piece for the main BDT
        # divideNumber            = 3
        # config.CutBase          = "eventNumber%{0}!=0".format( divideNumber )

        # # 10% for combination, 10% for error
        # config.CutComb          = "eventNumber%{0}==0 && eventNumber%{1}==0".format( divideNumber, 2*divideNumber )
        # config.CutError         = "eventNumber%{0}==0 && eventNumber%{1}!=0".format( divideNumber, 2*divideNumber )


        # config.CutBase  += " && NtupID<5000"
        # config.CutComb  += " && NtupID<5000"
        # config.CutError += " && NtupID<5000"


        # Limit number of events in training
        # config.CutBase  = "NtupID<1000"
        # config.CutComb  = "NtupID<1000"
        # config.CutError = "NtupID<1000"

        # Pre-selected events have this variable set to 1.0
        config.CutBase  = "(ptWeightCut)"
        config.CutComb  = "(ptWeightCut)"
        config.CutError = "(ptWeightCut)"


        ########################################
        # Order tree branches
        ########################################

        common_vars = [
            "(scRawEnergy+scPreshowerEnergy)*response",
            "resolution/response",
            "trkMomentumRelError",
            "trkMomentum/((scRawEnergy+scPreshowerEnergy)*response)",
            "eleEcalDriven",
            "fbrem",
            "gsfchi2",
            "gsfndof",
            "trkEta",
            "trkPhi",
            ]

        config.VariablesEB = common_vars + [
            ]

        config.VariablesEE = common_vars + [
            ]

        if Verbose:
            print '\n' + '-'*70
            print 'Making config file ' + config.Name + '.config'
            print '  Using the following branches for EE:'
            print '    ' + '\n    '.join( config.VariablesEE )
            print '  Using the following branches for EB:'
            print '    ' + '\n    '.join( config.VariablesEB )

        config.DoCombine        = "False"
        config.DoErrors         = "False"


        ########################################
        # Output
        ########################################

        config.Parse()
        return_configs.append( config )


    return return_configs
def Make_conf(Verbose=True):

    # Small testing samples -- do NOT use these for plots!
    root_file = 'Ntup_Jun22_fullpt_testing_sample.root'
    #    root_file = 'Ntup_Jun22_lowpt_testing_sample.root'

    # Low + high pt sample
    # root_file = 'Ntup_Jun22_fullpt_training.root'
    
    # Only low pt sample
    # root_file = 'Ntup_Jun22_lowpt_training.root'


    # ntup_path = os.path.join( '/data/userdata/rclsa/ElectronTrees/' )
    ntup_path = os.path.join( os.environ['CMSSW_BASE'], 'src/NTuples' )


    datestr = strftime( '%b%d' )

    if not os.path.isdir( ntup_path ):
        print 'Error: "{0}"" is not a directory'.format( ntup_path )
    physical_path = lambda input_root_file: os.path.join( ntup_path, input_root_file )


    return_configs = []
    for particle in [ 'electron', 'photon' ]:

        # Instantiate the Config class which prints a .config file
        config = Config()

        config.Name       = 'Config_' + particle + '_' + datestr

        config.InputFiles = physical_path( root_file )
        config.Tree       = 'een_analyzer/{0}Tree'.format( particle.capitalize() )


        ########################################
        # BDT settings
        ########################################

        config.Options = [
            "MinEvents=200",
            "Shrinkage=0.1",
            "NTrees=1000",
            "MinSignificance=5.0",
            # "EventWeight=max( min(1,exp(-(genPt-50)/50)), 0.1 )", # <-- What to do?
            "EventWeight=1", # <-- No one really likes the weights
            ]

        config.Target           = "genEnergy / ( scRawEnergy + scPreshowerEnergy )"
        config.TargetError      = "1.253*abs( BDTresponse - genEnergy / ( scRawEnergy + scPreshowerEnergy ) )"
        config.HistoConfig      = "jobs/dummy_Histo.config"
        
        # config.CutBase          = "eventNumber%2==0"
        # Remove 2/7th of the events (reduces training from 70% to 50%)
        config.CutBase          = "eventNumber%2==0 && ( (eventNumber/2)%7==0 || (eventNumber/2)%7==1 )"

        config.CutEB            = "scIsEB"
        config.CutEE            = "!scIsEB"
        config.CutError         = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4==3)"

        # Add an additional cut so that the regression is fast
        # NtupIDcut = 10000
        # config.CutBase  += ' && (NtupID<={0})'.format( NtupIDcut )
        # config.CutError += ' && (NtupID<={0})'.format( NtupIDcut )
        # config.CutComb  += ' && (NtupID<={0})'.format( NtupIDcut )


        ########################################
        # Order tree branches
        ########################################

        common_vars = [

            # ======================================
            # Common variables

            # 'pt',            # RCLSA: you cannot use the result of the previous training for the new one
            # 'nVtx',          # rho should be enough information for the BDT
            'scRawEnergy',
            # 'scEta',         # Requires alignment information; use crystal number of the seed instead
            # 'scPhi',         # Requires alignment information; use crystal number of the seed instead
            'scEtaWidth',
            'scPhiWidth',
            #            'scSeedRawEnergy/scRawEnergy',  # RCLSA: Redundant with the one below
            'full5x5_e5x5/scRawEnergy',
            'hadronicOverEm',
            'rhoValue',
            'delEtaSeed',
            'delPhiSeed',


            # ======================================
            # Showershape variables

            # Use full 5x5 instead
            # 'r9',
            # 'eHorizontal',
            # 'eVertical',
            # 'sigmaIetaIeta',
            # 'sigmaIetaIphi',
            # 'sigmaIphiIphi',
            # 'e5x5',
            # 'e3x3',
            # 'eMax',
            # 'e2nd',
            # 'eTop',
            # 'eBottom',
            # 'eLeft',
            # 'eRight',
            # 'e2x5Max',
            # 'e2x5Left',
            # 'e2x5Right',
            # 'e2x5Top',
            # 'e2x5Bottom',

            # Normalization to scRawEnergy necessary?

            'full5x5_r9',
            #            'full5x5_eHorizontal',   # RCLSA: Redundant
            #            'full5x5_eVertical',     # RCLSA: Redundant
            'full5x5_sigmaIetaIeta',
            'full5x5_sigmaIetaIphi',
            'full5x5_sigmaIphiIphi',
            # 'full5x5_e5x5',               # RCLSA: Use ratios
            # 'full5x5_e3x3/full5x5_e5x5',  # RCLSA: Redundant, this is R9
            'full5x5_eMax/full5x5_e5x5',
            'full5x5_e2nd/full5x5_e5x5',
            'full5x5_eTop/full5x5_e5x5',
            'full5x5_eBottom/full5x5_e5x5',
            'full5x5_eLeft/full5x5_e5x5',
            'full5x5_eRight/full5x5_e5x5',
            'full5x5_e2x5Max/full5x5_e5x5',
            'full5x5_e2x5Left/full5x5_e5x5',
            'full5x5_e2x5Right/full5x5_e5x5',
            'full5x5_e2x5Top/full5x5_e5x5',
            'full5x5_e2x5Bottom/full5x5_e5x5',


            # ======================================
            # Saturation variables

            'N_SATURATEDXTALS',
            #            'seedIsSaturated',   # RCLSA: probably overkill
            #            'seedCrystalEnergy/scSeedRawEnergy',   # RCLSA: There is only 1/1e6 cases in which the max energy is not the seed


            # ======================================
            # Cluster variables

            'N_ECALClusters',
            #            'clusterMaxDR',          # RCLSA Very mismodelled variables
            #            'clusterMaxDRDPhi',
            #            'clusterMaxDRDEta',
            #            'clusterMaxDRRawEnergy',

            'clusterRawEnergy[0]/scRawEnergy',
            'clusterRawEnergy[1]/scRawEnergy',
            'clusterRawEnergy[2]/scRawEnergy',
            'clusterDPhiToSeed[0]',
            'clusterDPhiToSeed[1]',
            'clusterDPhiToSeed[2]',
            'clusterDEtaToSeed[0]',
            'clusterDEtaToSeed[1]',
            'clusterDEtaToSeed[2]',

            ]


        config.VariablesEB = common_vars + [
            # 'cryEtaCoordinate',  # Requires alignment information; use crystal number of the seed instead
            # 'cryPhiCoordinate',  # Requires alignment information; use crystal number of the seed instead
            'iEtaCoordinate',
            'iPhiCoordinate',
            'iEtaMod5',
            'iPhiMod2',
            'iEtaMod20',
            'iPhiMod20',
            ]

        config.VariablesEE = common_vars + [
            # 'cryXCoordinate',  # Requires alignment information; use crystal number of the seed instead
            # 'cryYCoordinate',  # Requires alignment information; use crystal number of the seed instead
            'iXCoordinate',
            'iYCoordinate',
            'scPreshowerEnergy/scRawEnergy',
            'preshowerEnergyPlane1/scRawEnergy',
            'preshowerEnergyPlane2/scRawEnergy',
            ]

        if Verbose:
            print 'Using the following branches for EE:'
            print '    ' + '\n    '.join( config.VariablesEE )
            print 'Using the following branches for EB:'
            print '    ' + '\n    '.join( config.VariablesEB )


        ########################################
        # Ep combination
        ########################################

        # Only do the combination for the electron
        if particle == 'electron':

            config.DoCombine        = "True"

            config.TargetComb       = "( genEnergy - ( scRawEnergy + scPreshowerEnergy )*BDTresponse ) / ( trkMomentum - ( scRawEnergy + scPreshowerEnergy )*BDTresponse )"
            config.CutComb          = "(eventNumber%2!=0) && (((eventNumber-1)/2)%4!=3)"

            config.VariablesComb = [
                '( scRawEnergy + scPreshowerEnergy ) * BDTresponse',
                'BDTerror/BDTresponse',
                'trkMomentum',
                'trkMomentumRelError',
                'BDTerror/BDTresponse/trkMomentumRelError',
                '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum',
                ( '( scRawEnergy + scPreshowerEnergy )*BDTresponse/trkMomentum  *' +
                  'sqrt( BDTerror/BDTresponse*BDTerror/BDTresponse + trkMomentumRelError*trkMomentumRelError)' ),
                'eleEcalDriven',
                'eleTrackerDriven',
                'eleClass',
                'scIsEB',
                ]
        
        else:
            config.DoCombine        = "False"


        ########################################
        # Output
        ########################################

        if Verbose:
            # Print all branches as a check
            print "\nAll branches in root file:"
            Read_branches_from_rootfile( physical_path(root_file) , config.Tree )

        config.Parse()

        # # Test if the config file can be read by ROOT TEnv
        # print '\nReading in {0} and trying ROOT.TEnv( ..., 0 ):'.format( out_filename )
        # I_TEnv = ROOT.TEnv()
        # I_TEnv.ReadFile( out_filename, 0 )
        # I_TEnv.Print()
        # print 'Exited normally'
        # print '='*70
        # print

        return_configs.append( config )

    return return_configs