コード例 #1
0
def test_recombine_eventWise():
    with TempTestDir("tst") as dir_name:
        # make a real eventWise to play with
        file_name = "test.parquet"
        ew = Components.EventWise(os.path.join(dir_name, file_name))
        ew_path = os.path.join(dir_name, file_name)
        n_events = 12
        ew.append(Event_n=ak.from_iter(np.arange(n_events)))
        # calling it on an eventWise that hasn't been split should return the same eventWise
        found = ParallelFormJets.recombine_eventWise(ew_path)
        assert len(found.columns) == 1
        tst.assert_allclose(found.Event_n, ew.Event_n)
        # fragment the eventWise and delete the last fragment
        paths = ew.fragment("Event_n", n_fragments=3)
        fragment_dir = os.path.split(paths[0])[0]
        os.remove(paths[2])
        partial = ParallelFormJets.recombine_eventWise(ew_path)
        # there is no garuntee on which part was removed
        preserved = [i in partial.Event_n for i in ew.Event_n]
        assert sum(preserved) == 8
        # there should now be a joined component in fragment_dir
        assert next(name for name in os.listdir(fragment_dir)
                    if "joined.parquet" in name)
        # repeating the exercize should ignore this joined component
        partial = ParallelFormJets.recombine_eventWise(ew_path)
        preserved = [i in partial.Event_n for i in ew.Event_n]
        assert sum(preserved) == 8
コード例 #2
0
def fake_worker(eventWise_path, run_condition, function, jet_class, jet_name,
                cluster_parameters, batch_size):
    eventWise = Components.EventWise.from_file(eventWise_path)
    eventWise.append(Catto=[2, 3])
    eventWise.append_hyperparameters(Run_condition=run_condition,
                                     Jet_class=jet_class.__name__,
                                     Cluster_parameters=cluster_parameters,
                                     Batch_size=batch_size)
    ParallelFormJets.mark_finished(eventWise_path, function.__name__)
コード例 #3
0
def end_sequence(eventWise_path, end_time, dijet_mass=None, jet_pt_cut=None):
    run_fix(eventWise_path, end_time)
    # score clusterings
    ParallelFormJets.run_score(eventWise_path, end_time, dijet_mass)
    if time.time() > end_time:
        return
    # calculate mass peaks
    if jet_pt_cut is not None:
        ParallelFormJets.run_correct_masses(eventWise_path, end_time, jet_pt_cut)
    if time.time() > end_time:
        return
コード例 #4
0
def test_name_generator():
    # should work fine with no existing jets
    jet_class = "Horse"
    gen = ParallelFormJets.name_generator(jet_class, [])
    assert next(gen) == "Horse1Jet"
    assert next(gen) == "Horse2Jet"
    # and should work if there are jets
    jet_class = "Horse"
    existing_jets = ["Horse1Jet", "Horse3Jet", "Cat2Jet", "Cat0Jet"]
    gen = ParallelFormJets.name_generator(jet_class, existing_jets)
    assert next(gen) == "Horse2Jet"
    assert next(gen) == "Horse4Jet"
コード例 #5
0
def cluster_sequence(file_name,
                     names,
                     classes,
                     params,
                     jet_pt_cut,
                     dijet_mass,
                     pileup_file=None,
                     mean_pileup_per_event=50,
                     z_cuts=[0.2, 0.4]):
    if pileup_file is not None:
        ew = NonwritingEventWise.from_file(file_name)
    else:
        ew = Components.EventWise.from_file(file_name)

    if pileup_file is not None:
        print("Adding pileup")
        ew = remove_excess_data(ew)
        pileup = NonwritingEventWise.from_file(pileup_file)
        ew = AddPileup.add_pileup(ew, pileup, mean_pileup_per_event)
        filter_functions = "remove pileup"
    else:
        filter_functions = "ignore pileup"

    print("Adding jet inputs")
    FormJetInputs.create_jetInputs(ew, filter_functions=filter_functions)

    print("Adding jets")
    for name, cla, param in zip(names, classes, params):
        finished = False
        while not finished:
            finished = FormJets.cluster_multiapply(ew, cla, param, name,
                                                   np.inf)
    n_jets = len(names) * 20
    if z_cuts:
        print("Doing z_cuts")
        ParallelFormJets.batch_masks(ew, z_cuts, n_jets)
        ParallelFormJets.batch_filter(ew, n_jets)

    print("Calculating scores")
    if 'top' in str(dijet_mass):
        ParallelFormJets.batch_semileptonic(ew, n_jets)
        ParallelFormJets.batch_correct_semileptonic_masses(ew, n_jets)
    else:
        CompareClusters.append_scores(ew,
                                      dijet_mass=dijet_mass,
                                      end_time=np.inf)
        ParallelFormJets.batch_correct_masses(ew, jet_pt_cut, n_jets)
    return ew
コード例 #6
0
def make_minimal(ew, minimal_name, with_jet_content=True):
    hyperparameter_columns = ew.hyperparameter_columns
    new_columns = [
        name for name in ew.columns
        if ParallelFormJets.check_wanted_column(name)
    ]
    new_contents = {
        name: getattr(ew, name)
        for name in new_columns + hyperparameter_columns
    }
    if with_jet_content:
        jet_content = get_minimal_jet_content(ew)
        new_contents.update(jet_content)
        new_columns += list(jet_content.keys())
    if os.sep in minimal_name:
        path = minimal_name
    else:
        path = os.path.join(ew.dir_name, minimal_name)
    minimal = Components.EventWise(path, new_columns, new_contents,
                                   hyperparameter_columns)
    return minimal
コード例 #7
0
def test_remove_partial():
    paths = []
    with TempTestDir("tst") as dir_name:
        # calling it on no paths should run fine
        ParallelFormJets.remove_partial([])
        # now make a real eventWise to play with
        file_name = "test1.parquet"
        ew = Components.EventWise(os.path.join(dir_name, file_name))
        ew_path1 = os.path.join(dir_name, file_name)
        paths.append(ew_path1)
        n_events = 12
        params = {}
        params['Event_n'] = ak.from_iter(np.arange(n_events))
        params['JetInputs_Label'] = ak.from_iter(np.arange(n_events))
        unfinished_jet = 'CatJet'
        n_unfinished = 6
        params[unfinished_jet + '_Label'] = ak.from_iter(
            np.random.rand(n_events - n_unfinished))
        finished_jet = 'CatJetJet'
        params[finished_jet + '_Label'] = ak.from_iter(
            np.random.rand(n_events))
        ew.append(**params)
        # calling it with the wrong total length should yeild an error
        with pytest.raises(AssertionError):
            ParallelFormJets.remove_partial(paths, n_events + 1)
        # calling it with the right length should remove the right jet
        ParallelFormJets.remove_partial(paths, n_events)
        ew = Components.EventWise.from_file(paths[0])
        for name in ["Event_n", finished_jet + "_Label", "JetInputs_Label"]:
            tst.assert_allclose(params[name], getattr(ew, name))
        assert unfinished_jet + "_Label" not in ew.columns
        # doing it again should have no effect
        ParallelFormJets.remove_partial(paths, n_events)
        ew = Components.EventWise.from_file(paths[0])
        for name in ["Event_n", finished_jet + "_Label", "JetInputs_Label"]:
            tst.assert_allclose(params[name], getattr(ew, name))
        assert unfinished_jet + "_Label" not in ew.columns
コード例 #8
0
def end_sequence(eventWise_path,
                 end_time,
                 z_cuts=None,
                 dijet_mass=None,
                 jet_pt_cut=None):
    """
    Scan over all combinations of a range of options. then score the result

    Parameters
    ----------
    eventWise_path : str
        Path to the dataset used for input and writing outputs.
    jet_class : str
    end_time : int
        time to stop scanning.
    scan_parameters : dict or str
    fix_parameters : dict
    dijet_mass : float
        mass of the dijets for scoring
        if not given the events are not scored
    irc_prep: bool
        should the file be preped for irc calculations?
    
    """
    # make pileup free jets
    if z_cuts is not None:
        #jet_names = ["AntiKTp4Jet", "AntiKTp8Jet", "CAp4Jet", "CAp8Jet", "KTp4Jet", "KTp8Jet"]
        jet_names = None
        ParallelFormJets.run_modified_mass_drop(eventWise_path, end_time,
                                                z_cuts, jet_names)
    if time.time() > end_time:
        return
    # score clusterings
    ParallelFormJets.run_score(eventWise_path, end_time, dijet_mass)
    if time.time() > end_time:
        return
    # calculate mass peaks
    if jet_pt_cut is not None:
        ParallelFormJets.run_correct_masses(eventWise_path, end_time,
                                            jet_pt_cut)
    if time.time() > end_time:
        return
コード例 #9
0
def test_worker():
    # mock multiapply - it has been tested elsewhere
    with unittest.mock.patch('jet_tools.FormJets.cluster_multiapply',
                             new=fake_cluster_multiapply):
        with TempTestDir("tst") as temp_dir:
            jet_name = "GoodJet"
            ew = Components.EventWise(os.path.join(temp_dir, "file.parquet"))
            ew.write()
            eventWise_path = os.path.join(temp_dir, ew.file_name)
            # get rid of a continue file if there is one or we will get stuck
            try:
                os.remove('continue')
            except FileNotFoundError:
                pass
            # running with the continue condition should resutl in instant shutdown
            ParallelFormJets._worker(eventWise_path, 'continue',
                                     ParallelFormJets.batch_cluster,
                                     FormJets.Spectral, jet_name, 13)
            assert len(fake_clusters) == 0
            # running with progfiling should strill resilt in a profile being made
            ParallelFormJets.worker(eventWise_path, 'continue',
                                    ParallelFormJets.batch_cluster,
                                    FormJets.Spectral, jet_name, 13)
            assert len(fake_clusters) == 0
            assert os.path.exists(eventWise_path.replace('.parquet', '.prof'))
            # now if we run for 10 seconds we expect less that 11 but more than 7
            ParallelFormJets._worker(eventWise_path,
                                     time.time() + 10,
                                     ParallelFormJets.batch_cluster,
                                     FormJets.Spectral, jet_name, 13)
            assert len(fake_clusters) > 7
            assert len(fake_clusters) < 11
            for fake in fake_clusters:
                assert fake['eventWise'] == ew
                assert fake['cluster_algorithm'] == FormJets.Spectral
                assert fake['batch_length'] == 13
                assert len(fake['dict_jet_params']) == 0
コード例 #10
0
def test_make_n_working_fragments():
    with TempTestDir("tst") as dir_name:
        # calling this one a directory that dosn't contain any eventWise objects
        # should raise a file not found erroj
        with pytest.raises(FileNotFoundError):
            ParallelFormJets.make_n_working_fragments(dir_name, 3, "squibble")
        # trying to split something that isn't an eventwise shoudl raise a NotADirectoryError
        wrong_path = os.path.join(dir_name, "wrong.ods")
        open(wrong_path, 'w').close()  # equivalent of touch
        with pytest.raises(NotADirectoryError):
            ParallelFormJets.make_n_working_fragments(wrong_path, 3, "flob")
        os.remove(wrong_path)
        # now make a real eventWise to play with
        file_name = "test.parquet"
        ew = Components.EventWise(os.path.join(dir_name, file_name))
        ew_path = os.path.join(dir_name, file_name)
        # try with 12 events
        n_events = 12
        params = {}
        params['Event_n'] = ak.from_iter(np.arange(n_events))
        params['JetInputs_Energy'] = ak.from_iter(np.arange(n_events))
        unfinished_jet = 'DogJet'
        n_unfinished = 6
        params[unfinished_jet + '_Energy'] = ak.from_iter(
            np.random.rand(n_events - n_unfinished))
        params[unfinished_jet + '_Food'] = ak.from_iter([
            np.random.rand(np.random.randint(5))
            for _ in range(n_events - n_unfinished)
        ])
        finished_jet = 'CatJet'
        params[finished_jet + '_Energy'] = ak.from_iter([[
            ak.from_iter(np.random.rand(np.random.randint(5)))
            for _ in range(np.random.randint(5))
        ] for _ in range(n_events)])
        ew.append(**params)
        # making fragments of the finished jet should result in no change
        paths = ParallelFormJets.make_n_working_fragments(
            ew_path, 3, finished_jet)
        assert isinstance(paths, bool)
        assert paths
        # check nothing else has been made in the dir
        assert len(os.listdir(dir_name)) == 1
        # now split the unfinished jet
        paths = ParallelFormJets.make_n_working_fragments(
            ew_path, 3, unfinished_jet)
        assert len(paths) == 3
        ew.selected_event = None
        # there is no garentee that the split events will hold the same order
        expected_indices = list(range(n_events - n_unfinished, n_events))
        for path in paths:
            ew_part = Components.EventWise.from_file(path)
            indices_here = ew_part.JetInputs_Energy.tolist()
            for i in indices_here:
                expected_indices.remove(i)
            flat_here = ak.flatten(ew.CatJet_Energy[indices_here])
            flat_part = ak.flatten(ew_part.CatJet_Energy)
            try:
                flat_here = ak.flatten(flat_here)
                flat_part = ak.flatten(flat_part)
            except ValueError:  # already flat
                pass
            tst.assert_allclose(flat_here.tolist(), flat_part.tolist())
        assert not expected_indices, "Didn't find all the expected indices"
        # if we ask for the same split again it would not do anything
        paths2 = ParallelFormJets.make_n_working_fragments(
            ew_path, 3, unfinished_jet)
        assert set(paths2) == set(paths)
        paths2 = ParallelFormJets.make_n_working_fragments(
            os.path.split(paths[0])[0], 3, unfinished_jet)
        assert set(paths2) == set(paths)
        # if we ask for a diferent number of paths it should recluster and then split
        paths3 = ParallelFormJets.make_n_working_fragments(
            ew_path, 2, unfinished_jet)
        assert len(paths3) == 2
        # there is no garentee that the split events will hold the same order
        expected_indices = list(range(n_events - n_unfinished, n_events))
        for path in paths3:
            ew_part = Components.EventWise.from_file(path)
            indices_here = ew_part.JetInputs_Energy.tolist()
            for i in indices_here:
                expected_indices.remove(i)
            flat_here = ak.flatten(ew.CatJet_Energy[indices_here])
            flat_part = ak.flatten(ew_part.CatJet_Energy)
            try:
                flat_here = ak.flatten(flat_here)
                flat_part = ak.flatten(flat_part)
            except ValueError:  # already flat
                pass
            tst.assert_allclose(flat_here.tolist(), flat_part.tolist())
        assert not expected_indices, "Didn't find all the expected indices"
        # remove any existing directories
        [
            shutil.rmtree(os.path.join(dir_name, name))
            for name in os.listdir(dir_name) if '.' not in name
        ]
        # try fragmenting and then joining
        ew.fragment("JetInputs_Energy", n_fragments=3)
        fragment_dir = next(
            os.path.join(dir_name, name) for name in os.listdir(dir_name)
            if "fragment" in name)
        Components.EventWise.combine(fragment_dir,
                                     ew.file_name.split('.', 1)[0])
        ParallelFormJets.make_n_working_fragments(ew_path, 4, finished_jet)
        # check it has reconstructed the original eventWise
        found = os.listdir(fragment_dir)
        assert len(found) == 1
        new_ew = Components.EventWise.from_file(
            os.path.join(fragment_dir, found[0]))
        order = np.argsort(new_ew.JetInputs_Energy)
        dog_order = order[order < n_events - n_unfinished]
        tst.assert_allclose(ew.JetInputs_Energy,
                            new_ew.JetInputs_Energy[order])
        tst.assert_allclose(ew.DogJet_Energy, new_ew.DogJet_Energy[dog_order])
        for i, j in enumerate(dog_order):
            tst.assert_allclose(ew.DogJet_Food[i], new_ew.DogJet_Food[j])
        for i, j in enumerate(order):
            for evt, new_evt in zip(ew.CatJet_Energy[i],
                                    new_ew.CatJet_Energy[j]):
                tst.assert_allclose(evt, new_evt)
コード例 #11
0
def test_generate_pool():
    # mock _worker, this is tested above
    with unittest.mock.patch('jet_tools.ParallelFormJets._worker',
                             new=fake_worker):
        with TempTestDir("tst") as temp_dir:
            ew = Components.EventWise(os.path.join(temp_dir, "file.parquet"))
            ew.append(JetInputs_Energy=np.arange(100))
            eventWise_path = os.path.join(temp_dir, ew.file_name)
            # get rid of a continue file if there is one or we will get stuck
            try:
                os.remove('continue')
            except FileNotFoundError:
                pass
            end_time = time.time() + 100
            jet_class = FormJets.Spectral
            jet_params = {"Bark": 3}
            n_cores = psutil.cpu_count()
            function = lambda x: x
            found = ParallelFormJets.generate_pool(
                eventWise_path,
                function,
                function_args=[jet_class, "Bali", jet_params],
                function_kwargs=dict(batch_size=500),
                leave_one_free=True,
                end_time=end_time)
            assert found, "One of the processes crashed"
            # now we expect to find a subdirectory containing n_cores - 1 eventWise
            subdir = next(
                os.path.join(temp_dir, name) for name in os.listdir(temp_dir)
                if ".parquet" not in name)
            paths = [
                os.path.join(subdir, name) for name in os.listdir(subdir)
                if Components.EventWise.pottential_file(name)
            ]
            assert len(paths) == n_cores - 1
            # check the ake worker has been run on all of them
            for path in paths:
                ew = Components.EventWise.from_file(path)
                tst.assert_allclose(ew.Catto, [2, 3])
                assert ew.Run_condition == end_time
                assert ew.Jet_class == jet_class.__name__
                assert ew.Cluster_parameters["Bark"] == 3
                assert ew.Batch_size == 500
            # tidy up
            shutil.rmtree(os.path.split(path)[0])
            # it should work fine with a continue file too
            open('continue', 'w').close()
            found = ParallelFormJets.generate_pool(
                eventWise_path,
                function,
                function_args=[jet_class, "Bali", jet_params],
                function_kwargs=dict(batch_size=500),
                leave_one_free=False,
                end_time=None)
            assert found, "One of the processes crashed"
            # now we expect to find a subdirectory containing n_cores eventWise
            subdir = next(
                os.path.join(temp_dir, name) for name in os.listdir(temp_dir)
                if ".parquet" not in name)
            paths = [
                os.path.join(subdir, name) for name in os.listdir(subdir)
                if Components.EventWise.pottential_file(name)
            ]
            assert len(paths) == n_cores, \
                f"paths = {len(paths)}, cores = {n_cores}"
            # check the ake worker has been run on all of them
            for path in paths:
                ew = Components.EventWise.from_file(path)
                tst.assert_allclose(ew.Catto, [2, 3])
                assert ew.Run_condition == 'continue'
                assert ew.Jet_class == jet_class.__name__
                assert ew.Cluster_parameters["Bark"] == 3
                assert ew.Batch_size == 500
コード例 #12
0
def test_make_jet_name():
    expected = "Dog3Jet"
    found = ParallelFormJets.make_jet_name("Dog", 3.)
    assert found == expected
コード例 #13
0
unfinished = []
for name in os.listdir(directory):
    if not Components.EventWise.pottential_file(name):
        continue
    data_file = os.path.join(directory, name)
    fragment_name = name.replace(Components.EventWise.FILE_EXTENTION, "_fragment")
    if not os.path.exists(fragment_name):
        unfinished.append(data_file)
        continue
    for name in os.listdir(fragment_name):
        if name.endswith("_finished_batch_masks"):
            break
    else:
        unfinished.append(data_file)
if len(unfinished) == 0:
    raise RuntimeError(f"Couldn't find unfinished data in {directory}")
print(f"Found {len(unfinished)} data files to process,")
print(f" starting with {unfinished[0]}")
eventWise_path = unfinished[0]

scan_parameters = None
fix_parameters = None
z_cut = [0.2, 0.4]

jet_class = FormJets.GeneralisedKT
ParallelFormJets.complete_sequence(eventWise_path, jet_class,
                                   end_time, scan_parameters,
                                   fix_parameters=fix_parameters,
                                   dijet_mass=40, z_cuts=z_cut)
コード例 #14
0
ew_shapes = Components.EventWise.from_file("../megaIgnore/IRC_shapes2.parquet")


for n in range(1, 5):
    spectral_kinematics = [[[[] for _ in spectral_jets] for _ in ew_shapes.kinematic_names] for _ in ew_shapes.orders]

    for order in ["nlo", "lo"]:
        o_idx = list(ew_shapes.orders).index(order)
        name = file_name.format(n, order, n)
        print(name)
        dir_name = os.path.split(name)[0]
        if os.path.exists(name):
            ew = Components.EventWise.from_file(name)
        else:
            print("Joining...", end='')
            ew = ParallelFormJets.combine_completed(dir_name, False)
            print("joined.", flush=True)
        
        print("Getting jet kinematics")
        for j_idx, jname in enumerate(spectral_jets):
            print('.', end='', flush=True)
            ew.selected_event = None
            for event_n in range(len(ew.Event_n)):
                ew.selected_event = event_n
                kinematics = get_event(ew, jname)
                for i, vals in enumerate(kinematics):
                    spectral_kinematics[o_idx][i][j_idx].append(vals)
        print("Saving", flush=True)
        save_name = os.path.join(dir_name, "kinematics.parquet")
        spectral_kinematics = ak.from_iter(spectral_kinematics)
        ak.to_parquet(spectral_kinematics, save_name)
コード例 #15
0
def run_fix(eventWise_path, end_time):
    args = []
    ParallelFormJets.generate_pool(eventWise_path, batch_fix,
                                   function_args=args, end_time=end_time)
コード例 #16
0
from jet_tools import ParallelFormJets, FormJets
import os, sys, time, shutil
jet_class_name = sys.argv[1].strip()
end_time = time.time() + int(sys.argv[2])
order = sys.argv[3].strip()
iteration = sys.argv[4].strip()

jet_class = getattr(FormJets, jet_class_name)

jet_source = f"../megaIgnore/IRCchecks_noPTcut1/iridislo_Scan_{jet_class_name}.awkd"
# copy the eventWise
source = f"../megaIgnore/IRCchecks_noPTcut{iteration}/iridis_pp_to_jjj_{order}{iteration}.awkd"
eventWise_path = f"../megaIgnore/IRCchecks_noPTcut{iteration}/iridis{order}_Scan_{jet_class_name}.awkd"
shutil.copyfile(source, eventWise_path)

# get the jet params, changeing the combination method
jet_params = 
ParallelFormJets.scan_score(eventWise_path, jet_class, end_time, jet_source)