Example #1
0
def createRandomizedFile(input_file_name):

    f = uproot.open(input_file_name)
    for input_tree_name in getFileKeys(f):

        tree = f[input_tree_name]

        #collection of branches in input file and their information
        branch_collection = BranchInfoCollection(tree)

        #split root file randomly into root_file
        number_of_splittings = numberOfFileSplittings(tree)
        split_file = TFile(splitFileName(input_file_name), 'RECREATE')
        split_trees = []
        for i in range(number_of_splittings):
            t = TTree(input_tree_name + str(i), input_tree_name + str(i))
            branch_collection.addBranches(t)
            split_trees.append(t)

        for array_map in TreeIterator(tree):

            #write each event to a random input file
            random_file_choices = np.random.choice(
                np.arange(number_of_splittings), len(array_map))
            for j in range(len(array_map)):
                branch_collection.fillArrays(array_map, j)
                split_trees[random_file_choices[j]].Fill()

        for t in split_trees:
            t.Write()
        split_file.Close()

        #read in new root file
        split_file = uproot.open(splitFileName(input_file_name))
        split_trees = [split_file[key] for key in getFileKeys(split_file)]

        #make randomized output file to which to write the split trees after shuffling
        randomized_file = TFile(randomizedFileName(input_file_name),
                                'RECREATE')
        randomized_tree = TTree(input_tree_name, input_tree_name)
        branch_collection.addBranches(randomized_tree)

        for t in split_trees:

            #load arrays for each of the split trees
            loaded_arrays = {
                key.decode('utf-8'): value
                for key, value in t.arrays().items()
            }

            #randomly shuffle all arrays
            size = len(t)
            random_indices = randomIndices(size)

            for key in loaded_arrays:
                loaded_arrays[key] = loaded_arrays[key][random_indices]

            for j in range(size):
                branch_collection.fillArrays(loaded_arrays, j)
                randomized_tree.Fill()

        #write fully randomized file
        randomized_tree.Write()
        randomized_file.Close()

        #clean up temporary split file
        os.remove(splitFileName(input_file_name))