def convertTree(inputs, outFolder, name, treeName, indexVars): logging.info("Starting conversion") checkNcreateFolder(outFolder) dataset = Dataset(name, outFolder, treeName) files = [] for _input in inputs: with open(_input, "r") as f: data = f.read() for line in data.split("\n"): if ".root" in line: files.append(line) logging.info("Setting files") dataset.addFiles(files) logging.info("Setting output branches") dataset.setOutputBranches("*") logging.debug("Setting indexing branches: %s", indexVars) dataset.outputIndex = indexVars logging.info("Starting processing dataset") dataset.process(999999999999999999999) logging.info("Finished processing")
def convertTree(config, treeName, category): """ Wrapper for the functionality of preprocessing.dataset """ logging.info("Starting conversion") checkNcreateFolder(config.outputFolder) datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[ category].name dataset = Dataset(datasetName, config.outputFolder, treeName) logging.info("Setting sample selection: %s", config.sampleSelection) dataset.sampleSelection = config.sampleSelection logging.info("Setting category selection: %s", config.categories[category].selection) dataset.selection = config.categories[category].selection if config.excludeBranches is not None: dataset.ignoreBranches = config.excludeBranches logging.info("Setting files") dataset.addFiles(config.files) logging.info("Setting output branches") dataset.setOutputBranches(config.outputVariables) logging.debug("Setting indexing branches: %s", config.indexVariables) dataset.outputIndex = config.indexVariables if config.addRatio: dataset.setSF(config.sampleSF, "sampleRatio") logging.info("Starting processing dataset") dataset.process(config.maxEvents) logging.info("Finished processing")
def convertTreeMulti(config, treeName, category): logging.info("Starting conversion using multi method") checkNcreateFolder(config.outputFolder) #For multi mode, we generate a dataset per sample. In the loop the output is disabled and in the end the #dataframs of the 1:: samples will be added to the first and saved eventsLeft = config.maxEvents dfs = [] baseDataset = None for iSample, sample in enumerate(config.samples): logging.info("Processing sample %s", sample) if iSample == 0: datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[ category].name else: datasetName = config.outputPrefix + "_" + config.sampleInfo[ sample].name + "_" + config.categories[category].name dataset = Dataset(datasetName, config.outputFolder, treeName) logging.info("Setting sample selection: %s", config.sampleInfo[sample].selection) dataset.sampleSelection = config.sampleInfo[sample].selection logging.info("Setting category selection: %s", config.categories[category].selection) dataset.selection = config.categories[category].selection if config.excludeBranches is not None: dataset.ignoreBranches = config.excludeBranches logging.info("Setting files") dataset.addFiles(config.sampleInfo[sample].files) logging.info("Setting output branches") dataset.setOutputBranches(config.outputVariables) logging.debug("Setting indexing branches: %s", config.indexVariables) dataset.outputIndex = config.indexVariables if config.addRatio: dataset.setSF(config.sampleInfo[sample].addSF, "sampleRatio") logging.info("Starting processing dataset") thisSampleDF = dataset.process(eventsLeft, skipOutput=True) eventsLeft -= len(thisSampleDF) dfs.append(thisSampleDF) if iSample == 0: baseDataset = copy(dataset) baseDataset.makeOutput(pd.concat(dfs)) logging.info("Finished processing")