Esempio n. 1
0
def summarize(mgr, df_cfg, event_selection, scribblers, max_events=-1):
    '''
        Summarise the data in the tree into the data frames (DFs) given in
        df_cfg.

        :param mgr: HEPPY job manager
        :param df_cfg(list): list of DF definitions
        :param event_selection: pairs of event selections and collectors
        :param scribblers: pairs of scribblers and empty collectors which
                           create new event content
        :param max_events(int): Number of events to process.
                                Default is -1 -> all events
    '''

    reader_collector_pairs = scribblers + event_selection

    # setting up defaults to complete the provided DF configs
    tableConfigCompleter = TableConfigCompleter(
        # using a composer to create a predictable output file name
        # based on the names of the output columns
        createOutFileName=TableFileNameComposer2(default_prefix='tbl_n'))
    # combine configs and completers
    reader_collector_pairs += complete(df_cfg, tableConfigCompleter)

    # Hard-coded list of components is temporary, need to remove this in the
    # near future
    return mgr.run(reader_collector_pairs,
                   components=[
                       "TTWJetsToLNu_amcatnloFXFX",
                       "ZJetsToNuNu_HT100to200_madgraph_ext1"
                   ])
def summarize(mgr, df_cfg, event_selection, scribblers, max_events=-1):
    '''
        Summarise the data in the tree into the data frames (DFs) given in
        df_cfg.

        :param mgr: HEPPY job manager
        :param df_cfg(list): list of DF definitions
        :param event_selection: pairs of event selections and collectors
        :param scribblers: pairs of scribblers and empty collectors which
                           create new event content
        :param max_events(int): Number of events to process.
                                Default is -1 -> all events
    '''

    reader_collector_pairs = scribblers + event_selection

    # setting up defaults to complete the provided DF configs
    tableConfigCompleter = TableConfigCompleter(
        # using a composer to create a predictable output file name
        # based on the names of the output columns
        createOutFileName=TableFileNameComposer2(default_prefix='tbl_n'))
    # combine configs and completers
    reader_collector_pairs += complete(df_cfg, tableConfigCompleter)

    # Unlike step6, now run over all components
    return mgr.run(reader_collector_pairs)
Esempio n. 3
0
    def _create_rc_pairs(self):
        keyAttrNames, keyOutColumnNames, binnings, keyIndices = zip(
            *self._binning)
        if not any(keyIndices):
            keyIndices = None
        base_cfg = dict(keyAttrNames=keyAttrNames,
                        keyOutColumnNames=keyOutColumnNames,
                        binnings=binnings,
                        keyIndices=keyIndices)

        df_configs = {}
        if not self._weights:
            name_composer = TableFileNameComposer()
            df_configs["unweighted"] = base_cfg
        else:
            for name, weights in self._weights.items():
                config = copy.copy(base_cfg)
                if weights != 1:
                    config["weight"] = weights
                df_configs[name] = config

            name_composer = WithInsertTableFileNameComposer(
                TableFileNameComposer(), list(df_configs.keys()))
        tableConfigCompleter = TableConfigCompleter(
            createOutFileName=name_composer, defaultOutDir=self.output_dir)

        return complete(df_configs.values(), tableConfigCompleter)
def summarize(tree, df_cfg, event_selection, scribblers, max_events = -1):
    '''
        Summarise the data in the tree into the data frames (DFs) given in
        df_cfg.

        :param tree(ROOT.TTree): the input tree
        :param df_cfg(list): list of DF definitions
        :param event_selection: pairs of event selections and collectors
        :param scribblers: pairs of scribblers and empty collectors which
                           create new event content
        :param max_events(int): Number of events to process.
                                Default is -1 -> all events
    '''

    reader_collector_pairs = scribblers + event_selection

    # setting up defaults to complete the provided DF configs
    tableConfigCompleter = TableConfigCompleter(
        # using a composer to create a predictable output file name
        # based on the names of the output columns
        createOutFileName=TableFileNameComposer2(default_prefix='tbl_n')
    )
    # combine configs and completers
    reader_collector_pairs += complete(df_cfg, tableConfigCompleter)

    # wrap tree for the event loop
    def event_builder():
        return alphatwirl.roottree.BEvents(tree, maxEvents=max_events)

    # create reader and collector collections
    reader = alphatwirl.loop.ReaderComposite()
    collector = alphatwirl.loop.CollectorComposite()
    for r, c in reader_collector_pairs:
        reader.add(r)
        collector.add(c)

    # loop over all events
    eventLoop = alphatwirl.loop.EventLoop(event_builder, reader)
    reader = eventLoop()

    # collect all results and return them
    return collector.collect(((None, (reader, )), ))
def summarize(out_dir, mgr, df_cfg, event_selection, scribblers,
              components_df):
    '''
        Summarise the data in the tree into the data frames (DFs) given in
        df_cfg.

        :param mgr: NANOAOD job manager
        :param df_cfg(list): list of DF definitions
        :param event_selection: pairs of event selections and collectors
        :param scribblers: pairs of scribblers and empty collectors which
                           create new event content
        :param components_df: list of nanoaod component names to summarize
    '''

    reader_collector_pairs = [(s, NullCollector())
                              for s in scribblers] + event_selection

    reader_collector_pairs += [(SaveTree(out_dir), NullCollector())]

    name_composer = WithInsertTableFileNameComposer(TableFileNameComposer(),
                                                    df_cfg.keys())
    # setting up defaults to complete the provided DF configs
    tableConfigCompleter = TableConfigCompleter(
        # using a composer to create a predictable output file name
        # based on the names of the output columns
        createOutFileName=name_composer,
        defaultOutDir=out_dir)
    # combine configs and completers
    reader_collector_pairs += complete(df_cfg.values(), tableConfigCompleter)

    print "Components:"
    for d, e in zip(components_df["dataset"], components_df["era"]):
        print "\t{}_{}".format(d, e)
    print

    return mgr.run(reader_collector_pairs, components=components_df)