def test_no_reported_statistics(): 'Look at the log file and report if it contains a statistics line' with LogCapture() as l_cap: as_pandas(f_single .Select(lambda e: e.Jets("AntiKt4EMTopoJets") .First() .pt() / 1000.0) ) assert str(l_cap).find('TFileAccessTracer INFO Sending') == -1
def test_flatten_array(): # A very simple flattening of arrays training_df = as_pandas(f_single .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets")') .Select('lambda j: j.pt()/1000.0')) assert abs(training_df.iloc[0]['col1'] - 52.02462890625) < 0.001 # type: ignore () assert int(training_df.iloc[0]['col1']) != int(training_df.iloc[1]['col1']) # type: ignore
def test_event_info_includes(): 'Make sure event info is pulling in the correct includes' training_df = as_pandas(f_single .Select(lambda e: e.EventInfo("EventInfo")) .Select(lambda e: e.runNumber())) print(training_df) assert len(training_df) == 10
def test_first_object_in_event(): # Make sure First puts it if statement in the right place. training_df = as_pandas(f_single .Select(lambda e: e.Jets("AntiKt4EMTopoJets") .First() .pt() / 1000.0) ) assert int(training_df.iloc[0]['col1']) == 52 # type: ignore
def test_single_column_output(): # And a power operator training_df = as_pandas(f_single .SelectMany(lambda e: e.Jets("AntiKt4EMTopoJets")) .Select(lambda j: j.pt() ** 2)) print(training_df) assert int(training_df.iloc[0]['col1']) == 2706562012 # type: ignore assert int(training_df.iloc[0]['col1']) != int(training_df.iloc[1]['col1']) # type: ignore
def test_simple_dict_output(): # A very simple flattening of arrays, and the binary division operator training_df = as_pandas(f_single .SelectMany(lambda e: e.Jets("AntiKt4EMTopoJets")) .Select(lambda j: { 'JetPt': j.pt() / 1000.0 })) print(training_df) assert abs(training_df.iloc[0]['JetPt'] - 52.02462890625) < 0.001 # type: ignore () assert int(training_df.iloc[0]['JetPt']) != int(training_df.iloc[1]['JetPt']) # type: ignore
def test_first_object_in_event_with_where(): # Make sure First puts it's if statement in the right place. training_df = as_pandas(f_single .Select(lambda e: e.Jets("AntiKt4EMTopoJets") .Select(lambda j: j.pt() / 1000.0) .Where(lambda jpt: jpt > 10.0) .First()) ) assert int(training_df.iloc[0]['col1']) == 52 # type: ignore assert len(training_df) == 10
def test_select_first_of_array(): # The hard part is that First() here does not return a single item, but, rather, an array that # has to be aggregated over. training_df = as_pandas(f_single .Select(lambda e: e.Jets("AntiKt4EMTopoJets") .Select(lambda _: e.Tracks("InDetTrackParticles")) .First() .Count()) ) assert training_df.iloc[0]['col1'] == 394 assert training_df.iloc[1]['col1'] == 387 assert training_df.iloc[-1]['col1'] == 381
def test_not_in_where(): # THis is a little tricky because the First there is actually running over one jet in the event. Further, the Where # on the number of tracks puts us another level down. So it is easy to produce code that compiles, but the First's if statement # is very much in the wrong place. training_df = as_pandas(f_single .Select(lambda e: e.Jets("AntiKt4EMTopoJets") .Select(lambda j: e.Tracks("InDetTrackParticles") .Where(lambda t: not (t.pt() > 1000.0))) .First() .Count()) ) assert training_df.iloc[0]['col1'] == 258
def test_md_job_options(): '''Run object corrections as we go Based on the following code: ''' training_df = as_pandas(f_single .MetaData({ 'metadata_type': 'add_job_script', 'name': 'sys_error_tool', 'script': [ "# Set up the systematics loader/handler service:", "from AnaAlgorithm.DualUseConfig import createService", "from AnaAlgorithm.AlgSequence import AlgSequence", "calibrationAlgSeq = AlgSequence()", "sysService = createService( 'CP::SystematicsSvc', 'SystematicsSvc', sequence = calibrationAlgSeq )", "sysService.sigmaRecommended = 1", "# Add sequence to job", ], }) .MetaData({ 'metadata_type': 'add_job_script', 'name': 'pileup_tool', 'script': [ "from AsgAnalysisAlgorithms.PileupAnalysisSequence import makePileupAnalysisSequence", "pileupSequence = makePileupAnalysisSequence( 'mc' )", "pileupSequence.configure( inputName = 'EventInfo', outputName = 'EventInfo_%SYS%' )", "# print( pileupSequence ) # For debugging", "calibrationAlgSeq += pileupSequence", ], 'depends_on': ['sys_error_tool'] }) .MetaData({ 'metadata_type': 'add_job_script', 'name': 'jet_corrections', 'script': [ "jetContainer = 'AntiKt4EMTopoJets'", "from JetAnalysisAlgorithms.JetAnalysisSequence import makeJetAnalysisSequence", "jetSequence = makeJetAnalysisSequence( 'mc', jetContainer, enableCutflow=True, enableKinematicHistograms=True )", "jetSequence.configure( inputName = jetContainer, outputName = 'AnalysisJetsBase_%SYS%' )", "calibrationAlgSeq += jetSequence", "# print( jetSequence ) # For debugging", "", "# Include, and then set up the jet analysis algorithm sequence:", "from JetAnalysisAlgorithms.JetJvtAnalysisSequence import makeJetJvtAnalysisSequence", "jvtSequence = makeJetJvtAnalysisSequence( 'mc', jetContainer, enableCutflow=True )", "jvtSequence.configure( inputName = { 'eventInfo' : 'EventInfo_%SYS%',", " 'jets' : 'AnalysisJetsBase_%SYS%' },", " outputName = { 'jets' : 'AnalysisJets_%SYS%' },", " affectingSystematics = { 'jets' : jetSequence.affectingSystematics() } )", "calibrationAlgSeq += jvtSequence", "# print( jvtSequence ) # For debugging", "calibrationAlgSeq.addSelfToJob( job )", # TODO: Can I do this earlier? "print(job) # for debugging", ], 'depends_on': ['pileup_tool'] }) # TODO: get the sequence above running (add to the job) # TODO: Make sure we correct the proper container, and see if the jet energy changes as expected. .SelectMany(lambda e: e.Jets("AnalysisJets_NOSYS")) .Select(lambda j: { 'JetPt': j.pt() / 1000.0 })) print(training_df) assert abs(training_df.iloc[0]['JetPt'] - 50.10308984375) < 0.001 # type: ignore () assert int(training_df.iloc[0]['JetPt']) != int(training_df.iloc[1]['JetPt']) # type: ignore
def test_truth_particles(): training_df = as_pandas(f_single .Select(lambda e: e.TruthParticles('TruthParticles').Count())) assert training_df.iloc[0]['col1'] == 1450