Beispiel #1
0
class DstDataTask(DataTask):
    release = b2luigi.Parameter()
    prod = b2luigi.IntParameter()
    database = b2luigi.IntParameter()

    def output(self):
        yield {"full_output.root": b2luigi.LocalTarget(_build_data_path(self))}
Beispiel #2
0
class Basf2PathTask(Basf2Task):
    num_processes = b2luigi.IntParameter(significant=False, default=0)
    max_event = b2luigi.IntParameter(significant=False, default=0)

    def create_path(self):
        raise NotImplementedError()

    @b2luigi.on_temporary_files
    def process(self):
        assert get_basf2_git_hash() == self.git_hash

        try:
            import basf2
            import ROOT
        except ImportError:
            raise ImportError(
                "Can not find ROOT or basf2. Can not use the basf2 task.")

        if self.num_processes:
            basf2.set_nprocesses(self.num_processes)

        if self.max_event:
            ROOT.Belle2.Environment.Instance().setNumberEventsOverride(
                self.max_event)

        path = self.create_path()

        path.add_module("Progress")
        basf2.print_path(path)
        basf2.process(path)

        print(basf2.statistics)
Beispiel #3
0
class SimulationTask(Basf2PathTask):
    n_events = luigi.IntParameter()
    event_type = luigi.EnumParameter(enum=SimulationType)

    def create_path(self):
        path = basf2.create_path()
        modularAnalysis.setupEventInfo(self.n_events, path)

        if self.event_type == SimulationType.y4s:
            dec_file = Belle2.FileSystem.findFile(
                'analysis/examples/tutorials/B2A101-Y4SEventGeneration.dec')
        elif self.event_type == SimulationType.continuum:
            dec_file = Belle2.FileSystem.findFile(
                'analysis/examples/simulations/B2A102-ccbarEventGeneration.dec'
            )
        else:
            raise ValueError(
                f"Event type {self.event_type} is not valid. It should be either 'Y(4S)' or 'Continuum'!"
            )

        generators.add_evtgen_generator(path, 'signal', dec_file)
        modularAnalysis.loadGearbox(path)
        simulation.add_simulation(path)

        path.add_module('RootOutput',
                        outputFileName=self.get_output_file_name(
                            'simulation_full_output.root'))

        return path

    def output(self):
        yield self.add_to_output("simulation_full_output.root")
Beispiel #4
0
class FullTimingTask(luigi.Task):
    """Run tsfresh with all calculators for comparison"""
    n_jobs = luigi.IntParameter()

    def output(self):
        yield self.add_to_output("result.json")

    def run(self):
        input_file = self._get_input_targets("data.csv")[0]

        with input_file.open("r") as f:
            df = pd.read_csv(f)

        start_time = time()
        extract_features(df, column_id="id", column_sort="time", n_jobs=self.n_jobs,
                         disable_progressbar=True)
        end_time = time()

        result_json = {
            "time": end_time - start_time,
            "n_ids": self.num_ids,
            "n_jobs": self.n_jobs,
            "time_series_length": int((df["id"] == 0).sum()),
        }

        with self._get_output_target("result.json").open("w") as f:
            json.dump(result_json, f)
Beispiel #5
0
class Resample(b2luigi.Task):
    """Resample the train sample and store it to a root file.

    Parameters:
        ntuple_file (str): Path to the file
        train_size (float): between 0 and 1, size of train sample
        test_size (float): size of test sample,
        random_seed (int): random seed to generate a resampled sample

    Output:
        train.root
    """
    random_seed = b2luigi.IntParameter()
    queue = "sx"

    def output(self):
        yield self.add_to_output("train.root")

    def run(self):
        df = root_pandas.read_root(*self.get_input_file_names('train.root'),
                                   key=self.tree_name)

        # resample
        resampled_df = resample(df, random_state=self.random_seed)

        # store to root
        root_pandas.to_root(resampled_df,
                            self.get_output_file_name('train.root'),
                            key=self.tree_name)
Beispiel #6
0
class MyNumberTask(b2luigi.Task):
    some_parameter = b2luigi.IntParameter()

    def output(self):
        yield self.add_to_output("output_file.txt")

    def run(self):
        random_number = random.random()

        with open(self.get_output_file_name("output_file.txt"), "w") as f:
            f.write(f"{random_number}\n")
Beispiel #7
0
class MyNumberTask(b2luigi.Task):
    some_parameter = b2luigi.IntParameter()

    def output(self):
        return b2luigi.LocalTarget(
            f"results/output_file_{self.some_parameter}.txt")

    def run(self):
        random_number = random.random()
        with self.output().open("w") as f:
            f.write(f"{random_number}\n")
Beispiel #8
0
class TaskA(luigi.Task):
    central_value = luigi.FloatParameter()
    index = luigi.IntParameter()

    def run(self):

        with open(self.get_output_file_name("random_numbers.txt"), "w") as f:
            for _ in range(1000):
                f.write(f"{random.gauss(self.central_value, 0.0)}\n")

    def output(self):
        yield self.add_to_output("random_numbers.txt")
Beispiel #9
0
class DataCreationTask(luigi.Task):
    """Create random data for testing"""
    num_ids = luigi.IntParameter(default=100)
    time_series_length = luigi.IntParameter()
    random_seed = luigi.IntParameter()

    def output(self):
        yield self.add_to_output("data.csv")

    def run(self):
        np.random.seed(self.random_seed)

        df = pd.concat([
            pd.DataFrame({
                "id": [i] * self.time_series_length,
                "time": range(self.time_series_length),
                "value": np.random.randn(self.time_series_length)
            })
            for i in range(self.num_ids)
        ])

        with self._get_output_target("data.csv").open("w") as f:
            df.to_csv(f)
Beispiel #10
0
        class TaskB(b2luigi.Task):
            another_parameter = b2luigi.IntParameter()

            def requires(self):
                for my_other_parameter in range(10):
                    yield self.clone(TaskA,
                                     some_other_parameter=my_other_parameter)

            def run(self):
                # somehow merge the output of TaskA to create "out.dat"
                pass

            def output(self):
                yield self.add_to_output("out.dat")
Beispiel #11
0
class TimingTask(luigi.Task):
    """Run tsfresh with the given parameters"""
    feature_parameter = luigi.DictParameter(hashed=True)
    n_jobs = luigi.IntParameter()
    try_number = luigi.IntParameter()

    def output(self):
        yield self.add_to_output("result.json")

    def run(self):
        input_file = self._get_input_targets("data.csv")[0]

        with input_file.open("r") as f:
            df = pd.read_csv(f)

        start_time = time()
        extract_features(df, column_id="id", column_sort="time", n_jobs=self.n_jobs,
                         default_fc_parameters=self.feature_parameter,
                         disable_progressbar=True)
        end_time = time()

        single_parameter_name = list(self.feature_parameter.keys())[0]
        single_parameter_params = self.feature_parameter[single_parameter_name]

        result_json = {
            "time": end_time - start_time,
            "n_ids": self.num_ids,
            "n_jobs": self.n_jobs,
            "feature": single_parameter_name,
            "number_parameters": len(single_parameter_params) if single_parameter_params else 0,
            "time_series_length": int((df["id"] == 0).sum()),
            "try_number": self.try_number,
        }

        with self._get_output_target("result.json").open("w") as f:
            json.dump(result_json, f)
Beispiel #12
0
class MyNumberTask(b2luigi.Task):
    some_parameter = b2luigi.IntParameter()

    htcondor_settings = {"request_cpus": 1, "request_memory": "100 MB"}

    def output(self):
        yield self.add_to_output("output_file.txt")

    def run(self):
        print("I am now starting a task")
        random_number = random.random()

        if self.some_parameter == 3:
            raise ValueError

        with open(self.get_output_file_name("output_file.txt"), "w") as f:
            f.write(f"{random_number}\n")
Beispiel #13
0
class AnalysisTask(Basf2PathTask):
    # set the batch_system property to use the gbasf2 wrapper batch process for this task
    batch_system = "gbasf2"
    # Must define a prefix for the gbasf2 project name to submit to the grid.
    # b2luigi will then add a hash derived from the luigi parameters to create a unique project name.
    gbasf2_project_name_prefix = b2luigi.Parameter()
    gbasf2_input_dataset = b2luigi.Parameter(hashed=True)
    # Example luigi cut parameter to facilitate starting multiple projects for different cut values
    mbc_lower_cut = b2luigi.IntParameter()

    def create_path(self):
        mbc_range = (self.mbc_lower_cut, 5.3)
        return example_mdst_analysis.create_analysis_path(
            d_ntuple_filename="D_ntuple.root",
            b_ntuple_filename="B_ntuple.root",
            mbc_range=mbc_range)

    def output(self):
        yield self.add_to_output("D_ntuple.root")
        yield self.add_to_output("B_ntuple.root")
Beispiel #14
0
class BootstrapTraining(b2luigi.Task):
    """Start a training with a resampled train sample. See also `Training`.

    Parameters:
        random_seed (int): random seed of the resampled train sample
        off_res_files (list): List with paths to off-res. files
        tree_name (str): name of the tree in the root file
        training_variables (list): list of training variables used for training
        training_parameters (dict): train- and test size,
            the following BDT hyper-parameters (optional): "nTrees",
            "shrinkage" and "nLevels".

    Output:
        bdt.xml
    """
    random_seed = b2luigi.IntParameter()
    off_res_files = b2luigi.ListParameter(hashed=True)
    tree_name = b2luigi.ListParameter()
    training_variables = b2luigi.ListParameter(hashed=True)
    training_parameters = b2luigi.DictParameter(hashed=True)

    def requires(self):
        train_size = self.training_parameters["train_size"]
        test_size = self.training_parameters["test_size"]

        for ntuple_file in self.off_res_files:
            yield self.clone(Resample,
                             ntuple_file=ntuple_file,
                             train_size=train_size,
                             test_size=test_size,
                             random_seed=self.random_seed)

    def output(self):
        yield self.add_to_output('bdt.xml')

    def run(self):
        Training.run(self)
Beispiel #15
0
class DataTask(b2luigi.ExternalTask):
    data_mode = b2luigi.EnumParameter(enum=DataMode)
    experiment_number = b2luigi.IntParameter()
    run_number = b2luigi.IntParameter()
    prefix = b2luigi.Parameter()
    file_name = b2luigi.Parameter()
Beispiel #16
0
class AggregatorTask(Basf2nTupleMergeTask):
    n_events = luigi.IntParameter()

    def requires(self):
        for event_type in SimulationType:
            yield self.clone(AnalysisTask, event_type=event_type)
Beispiel #17
0
        class TaskB(b2luigi.Task):
            another_parameter = b2luigi.IntParameter()

            def output(self):
                yield self.add_to_output("out.dat")
Beispiel #18
0
        class TaskA(b2luigi.Task):
            some_parameter = b2luigi.IntParameter()

            def output(self):
                yield self.add_to_output("file_a")
                yield self.add_to_output("file_b")
Beispiel #19
0
        class TaskA(b2luigi.Task):
            some_parameter = b2luigi.IntParameter()
            some_other_parameter = b2luigi.IntParameter()

            def output(self):
                yield self.add_to_output("test.txt")