def setRepositoryRevisions(self): ###could be inherited from artusWrapper!
		# expand possible environment variables in paths
		if isinstance(self._args.repo_scan_base_dirs, basestring):
			self._args.repo_scan_base_dirs = [self._args.repo_scan_base_dirs]
		self._args.repo_scan_base_dirs = [os.path.expandvars(repoScanBaseDir) for repoScanBaseDir in self._args.repo_scan_base_dirs]

		# construct possible scan paths
		subDirWildcards = ["*/" * level for level in range(self._args.repo_scan_depth+1)]
		scanDirWildcards = [os.path.join(repoScanBaseDir, subDirWildcard) for repoScanBaseDir in self._args.repo_scan_base_dirs for subDirWildcard in subDirWildcards]

		# globbing and filter for directories
		scanDirs = tools.flattenList([glob.glob(scanDirWildcard) for scanDirWildcard in scanDirWildcards])
		scanDirs = [scanDir for scanDir in scanDirs if os.path.isdir(scanDir)]

		# key: directory to check type of repository
		# value: command to extract the revision
		repoVersionCommands = {
			".git" : "git rev-parse HEAD",
			".svn" : "svn info"# | grep Revision | awk '{print $2}'"
		}
		# loop over dirs and revision control systems and write revisions to the config dict
		for repoDir, currentRevisionCommand in repoVersionCommands.items():
			repoScanDirs = [os.path.join(scanDir, repoDir) for scanDir in scanDirs]
			repoScanDirs = [glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs]
			repoScanDirs = tools.flattenList([glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs])
			repoScanDirs = [os.path.abspath(os.path.join(repoScanDir, "..")) for repoScanDir in repoScanDirs]

			for repoScanDir in repoScanDirs:
				popenCout, popenCerr = subprocess.Popen(currentRevisionCommand.split(), stdout=subprocess.PIPE, cwd=repoScanDir).communicate()
				self._config[repoScanDir] = popenCout.replace("\n", "")
Ejemplo n.º 2
0
	def setRepositoryRevisions(self):
		# expand possible environment variables in paths
		if isinstance(self._args.repo_scan_base_dirs, basestring):
			self._args.repo_scan_base_dirs = [self._args.repo_scan_base_dirs]
		self._args.repo_scan_base_dirs = [os.path.expandvars(repoScanBaseDir) for repoScanBaseDir in self._args.repo_scan_base_dirs]
		
		# construct possible scan paths
		subDirWildcards = ["*/" * level for level in range(self._args.repo_scan_depth+1)]
		scanDirWildcards = [os.path.join(repoScanBaseDir, subDirWildcard) for repoScanBaseDir in self._args.repo_scan_base_dirs for subDirWildcard in subDirWildcards]
		
		# globbing and filter for directories
		scanDirs = tools.flattenList([glob.glob(scanDirWildcard) for scanDirWildcard in scanDirWildcards])
		scanDirs = [scanDir for scanDir in scanDirs if os.path.isdir(scanDir)]
		
		# key: directory to check type of repository
		# value: command to extract the revision
		repoVersionCommands = {
			".git" : "git rev-parse HEAD",
			".svn" : "svn info"# | grep Revision | awk '{print $2}'"
		}
		# loop over dirs and revision control systems and write revisions to the config dict
		for repoDir, currentRevisionCommand in repoVersionCommands.items():
			repoScanDirs = [os.path.join(scanDir, repoDir) for scanDir in scanDirs]
			repoScanDirs = [glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs]
			repoScanDirs = tools.flattenList([glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs])
			repoScanDirs = [os.path.abspath(os.path.join(repoScanDir, "..")) for repoScanDir in repoScanDirs]
			
			for repoScanDir in repoScanDirs:
				popenCout, popenCerr = subprocess.Popen(currentRevisionCommand.split(), stdout=subprocess.PIPE, cwd=repoScanDir).communicate()
				self._config[repoScanDir] = popenCout.replace("\n", "")
Ejemplo n.º 3
0
    def prepare_args(self, parser, plotData):
        super(InputRoot, self).prepare_args(parser, plotData)

        self.prepare_list_args(plotData, [
            "nicks", "x_expressions", "y_expressions", "z_expressions",
            "x_bins", "y_bins", "z_bins", "scale_factors", "files",
            "directories", "folders", "weights", "friend_files",
            "friend_folders", "friend_aliases", "tree_draw_options",
            "proxy_prefixes"
        ],
                               help="InputRoot options")
        inputbase.InputBase.prepare_nicks(plotData)

        for key in ["folders"]:
            plotData.plotdict[key] = [
                element.split() if element else [""]
                for element in plotData.plotdict[key]
            ]
        for key in ["friend_files", "friend_folders"]:
            plotData.plotdict[key] = [
                element.split() if element else element
                for element in plotData.plotdict[key]
            ]

        if plotData.plotdict["redo_cache"] is None:
            plotData.plotdict["redo_cache"] = not any(
                tools.flattenList([[
                    input_file.startswith("/") or ("://" in input_file)
                    for input_file in files
                ] for files in plotData.plotdict["files"]]))

        if plotData.plotdict["read_config"]:
            self.read_input_json_dicts(plotData)
Ejemplo n.º 4
0
	def crossings_graph(x_crossings, y_crossing, x_lims=None):
		x_values = tools.flattenList(zip(*([x_crossings]*3)))
		y_values = [y_crossing, 0.0, y_crossing]*len(x_crossings)
		if x_lims:
			x_values = [x_lims[0]]+x_values+[x_lims[1]]
			y_values = [y_crossing]+y_values+[y_crossing]
		graph = ROOT.TGraph(len(x_values), array.array("d", x_values), array.array("d", y_values))
		graph.SetName("graph_" + hashlib.md5("_".join([str(x_crossings), str(y_crossing)])).hexdigest())
		return graph
Ejemplo n.º 5
0
 def crossings_graph(x_crossings, y_crossing, x_lims=None):
     x_values = tools.flattenList(zip(*([x_crossings] * 3)))
     y_values = [y_crossing, 0.0, y_crossing] * len(x_crossings)
     if x_lims:
         x_values = [x_lims[0]] + x_values + [x_lims[1]]
         y_values = [y_crossing] + y_values + [y_crossing]
     graph = ROOT.TGraph(len(x_values), array.array("d", x_values),
                         array.array("d", y_values))
     graph.SetName("graph_" + hashlib.md5("_".join(
         [str(x_crossings), str(y_crossing)])).hexdigest())
     return graph
Ejemplo n.º 6
0
	def prepare_args(self, parser, plotData):
		super(InputRoot, self).prepare_args(parser, plotData)

		self.prepare_list_args(plotData, ["nicks", "x_expressions", "y_expressions", "z_expressions", "x_bins", "y_bins", "z_bins", "scale_factors", "files", "directories", "folders", "weights", "friend_files", "friend_folders", "friend_aliases", "tree_draw_options", "proxy_prefixes"], help="InputRoot options")
		inputbase.InputBase.prepare_nicks(plotData)
		
		for key in ["folders"]:
			plotData.plotdict[key] = [element.split() if element else [""] for element in plotData.plotdict[key]]
		for key in ["friend_files", "friend_folders"]:
			plotData.plotdict[key] = [element.split() if element else element for element in plotData.plotdict[key]]
		
		if plotData.plotdict["redo_cache"] is None:
			plotData.plotdict["redo_cache"] = not any(tools.flattenList([[input_file.startswith("/") or ("://" in input_file) for input_file in files] for files in plotData.plotdict["files"]]))
		
		if plotData.plotdict["read_config"]:
			self.read_input_json_dicts(plotData)
Ejemplo n.º 7
0
	def merge_graphs(graphs, allow_reversed=False, allow_shuffle=False):
		"""
		Merge graphs
		"""
		if len(graphs) == 0:
			return None
		
		points = []
		for graph in graphs:
			x_values = graph.GetX()
			y_values = graph.GetY()
			points.append([[x_values[index], y_values[index]] for index in xrange(graph.GetN())])
		
		if allow_shuffle:
			points = geometry.order_lists_for_smallest_distances(points, allow_reversed=allow_reversed)
		
		merged_x_values, merged_y_values = zip(*tools.flattenList(points))
		merged_x_values = array.array("d", merged_x_values)
		merged_y_values = array.array("d", merged_y_values)
		graph = ROOT.TGraph(len(merged_x_values), merged_x_values, merged_y_values)
		return graph
Ejemplo n.º 8
0
	def merge_graphs(graphs, allow_reversed=False, allow_shuffle=False):
		"""
		Merge graphs
		"""
		if len(graphs) == 0:
			return None
		
		points = []
		for graph in graphs:
			x_values = graph.GetX()
			y_values = graph.GetY()
			points.append([[x_values[index], y_values[index]] for index in xrange(graph.GetN())])
		
		if allow_shuffle:
			points = geometry.order_lists_for_smallest_distances(points, allow_reversed=allow_reversed)
		
		merged_x_values, merged_y_values = zip(*tools.flattenList(points))
		merged_x_values = array.array("d", merged_x_values)
		merged_y_values = array.array("d", merged_y_values)
		graph = ROOT.TGraph(len(merged_x_values), merged_x_values, merged_y_values)
		return graph
Ejemplo n.º 9
0
                    os.path.join(channel_category, quark_type, unc_type)
                    for unc_type in [
                        "nominal", "alpha_s_up", "alpha_s_down", "pdf_up",
                        "pdf_down", "qcd_scale_up", "qcd_scale_down"
                    ]
                ]
                config["output_dir"] = os.path.join(args.output_dir, channel,
                                                    category, quark_type)
                config["filename"] = "energy_distribution"

                if not args.www is None:
                    config["www"] = os.path.join(args.www, channel, category,
                                                 quark_type)

                plot_configs.append(config)

    if log.isEnabledFor(logging.DEBUG):
        import pprint
        pprint.pprint(plot_configs)

    plot_results = higgsplot.HiggsPlotter(list_of_config_dicts=plot_configs,
                                          list_of_args_strings=[args.args],
                                          n_processes=args.n_processes,
                                          n_plots=args.n_plots)
    root_filenames = tools.flattenList(plot_results.output_filenames)
    merged_output = os.path.join(args.output_dir, "energy_distributions.root")
    tools.hadd(target_file=merged_output,
               source_files=root_filenames,
               hadd_args="-f")
    log.info("Merged outputs in " + merged_output)
    # plot best fit values of parameter pol from physics model
    plot_configs = []
    for template in [
            "$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/plots/configs/combine/best_fit_pol_over_channel.json",
            "$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/plots/configs/combine/best_fit_pol_over_channel_tot_stat_unc.json",
            "$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/plots/configs/combine/best_fit_weinberg_angle_over_channel.json",
            "$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/plots/configs/combine/best_fit_weinberg_angle_over_channel_tot_stat_unc.json"
    ]:

        config = jsonTools.JsonDict(os.path.expandvars(template))
        config["directories"] = [
            " ".join(
                set([
                    os.path.dirname(root_file) for root_file in sorted(
                        tools.flattenList(values_tree_files.values()))
                ]))
        ]
        config["x_ticks"] = sorted(values_tree_files.keys())
        inv_annotation_replacements = {
            value: key
            for key, value in annotation_replacements.iteritems()
        }
        config["x_tick_labels"] = [
            inv_annotation_replacements.get(int(value), value)
            for value in sorted(values_tree_files.keys())
        ]
        #config["x_tick_labels"] = ["#scale[1.5]{" + ("" if label == "combined" else "channel_") + label + "}" for label in config["x_tick_labels"]]
        config["x_tick_labels"] = [
            "" + ("" if label == "combined" else "channel_") + label + ""
            for label in config["x_tick_labels"]
                    split_stat_syst_uncs=True,
                    additional_freeze_nuisances=["r"])

            annotation_replacements = {
                channel: index
                for (index, channel) in enumerate(["combined"] + args.channel)
            }
            annotation_replacements.update({
                binid: index + 1
                for (index, binid) in enumerate(
                    sorted(
                        list(
                            set([
                                datacards.configs.category2binid(
                                    category, channel=category[:2]) for
                                category in tools.flattenList(args.categories)
                            ]))))
            })
            if not asimov_polarisation is None:
                annotation_replacements.update({
                    "pol{:04}".format(int(asimov_polarisation * 1000)):
                    asimov_polarisation
                })
            values_tree_files = {}
            if ("channel" in args.combinations) or ("category"
                                                    in args.combinations):
                datacards.annotate_trees(
                    datacards_workspaces, "higgsCombine*.*.mH*.root", ([[
                        os.path.join(
                            os.path.dirname(
                                template.replace("${CHANNEL}", "(.*)").replace(
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Collect matching trees from input files into one output tree",
        parents=[logger.loggingParser])

    parser.add_argument(
        "-i",
        "--input-dirs",
        help=
        "Input directories = crab project directories containing the subdirectories with crab tasks",
        nargs="+")
    parser.add_argument(
        "-o",
        "--output-dir",
        default=None,
        help=
        "Local output directory. [Default: subdir \"results\" in first input directory]"
    )
    parser.add_argument(
        "-d",
        "--dcache-target",
        default=None,
        help=
        "Directory on dCache (srm) where the files should be copied to. [Default: %(default)s]"
    )

    parser.add_argument(
        "--input-trees",
        nargs="+",
        default=["svfitCache"],
        help="Paths of input SVfit cache trees. [Default: %(default)s]")
    parser.add_argument(
        "--output-tree",
        default="svfitCache",
        help="Name of output SVfit cache tree. [Default: %(default)s]")
    parser.add_argument(
        "--previous-cache",
        default="",
        help=
        "Path to a previous cache which will be merged. [Default: %(default)s]"
    )
    parser.add_argument(
        "--dcache",
        type=bool,
        default=False,
        help="Read&Write from and to desy dcache[Default: %(default)s]")
    parser.add_argument(
        "--no-run",
        default=False,
        action="store_true",
        help="Do not run but only print dict  [Default: %(default)s]")
    parser.add_argument(
        "-n",
        "--n-processes",
        type=int,
        default=1,
        help="Number of (parallel) processes. [Default: %(default)s]")

    args = parser.parse_args()
    logger.initLogger(args)

    if args.output_dir is None:
        args.output_dir = os.path.join(args.input_dirs[0], "results")

    # get paths to crab outputs
    #max_n_jobs = 8000
    #max_n_retrieve = 500
    get_crab_outputs_args = []
    for input_dir in args.input_dirs:
        #for jobid_start in xrange(1, max_n_jobs, max_n_retrieve):
        #	jobid_end = jobid_start + max_n_retrieve - 1
        #	get_crab_outputs_args.append([input_dir, "{jobid_start}-{jobid_end}".format(jobid_start=jobid_start, jobid_end=jobid_end)])
        get_crab_outputs_args.append([input_dir, "1-10"])

    tar_files = tools.parallelize(_get_crab_outputs,
                                  get_crab_outputs_args,
                                  max(args.n_processes, 2),
                                  description="crab getoutput --dump")
    tar_files = tools.flattenList(tar_files)

    # download and un-tar
    download_untar_args = [[tar_file, args.output_dir]
                           for tar_file in tar_files]
    tools.parallelize(_download_untar,
                      download_untar_args,
                      args.n_processes,
                      description="download and un-tar crab outputs")

    root_files = glob.glob(os.path.join(args.output_dir, "*.root"))
    # TODO: maybe add more root files from -i arguments, that did not need to be un-tared

    root_files_per_sample_nick = {}
    for root_file in root_files:
        basename = os.path.basename(root_file)
        sample_nick = basename[:basename.index("_job_")]
        root_files_per_sample_nick.setdefault(sample_nick,
                                              []).append(root_file)

    merged_output_dir = os.path.join(args.output_dir, "merged")
    if not os.path.exists(merged_output_dir):
        os.makedirs(merged_output_dir)
    merge_outputs_args = [[
        os.path.join(merged_output_dir, sample_nick + ".root"), tmp_root_files,
        "-f"
    ] for sample_nick, tmp_root_files in
                          root_files_per_sample_nick.iteritems()]
    tools.parallelize(_merge_outputs,
                      merge_outputs_args,
                      args.n_processes,
                      description="merging")

    if args.dcache_target:
        dcache_copy_commands = [
            "gfal-copy -v -f -r " + merged_output_dir + " " +
            args.dcache_target
        ]
        tools.parallelize(_call_command,
                          dcache_copy_commands,
                          args.n_processes,
                          description="copying to dCache")

    rm_commands = ["rm " + root_file for root_file in root_files]
    if args.dcache_target:
        rm_commands.extend([
            "rm " + os.path.join(merged_output_dir, sample_nick + ".root")
            for sample_nick in root_files_per_sample_nick.keys()
        ])
    tools.parallelize(_call_command,
                      rm_commands,
                      args.n_processes,
                      description="deleting temporary files")

    log.info("\nJSON configuration for Artus:\n")
    config_output_dir = args.dcache_target if args.dcache_target else merged_output_dir
    for src, dst in filename_replacements.iteritems():
        config_output_dir = config_output_dir.replace(src, dst)
    for sample_nick in sorted(root_files_per_sample_nick.keys()):
        log.info("\"" + sample_nick + "\" : \"" +
                 os.path.join(config_output_dir, sample_nick + ".root") +
                 "\",")
Ejemplo n.º 13
0
        split_stat_syst_uncs=True,
        additional_freeze_nuisances=["r"])

    annotation_replacements = {
        channel: index
        for (index, channel) in enumerate(["combined"] + args.channel)
    }
    annotation_replacements.update({
        binid: index + 1
        for (index, binid) in enumerate(
            sorted(
                list(
                    set([
                        datacards.configs.category2binid(category,
                                                         channel=category[:2])
                        for category in tools.flattenList(args.categories)
                    ]))))
    })
    values_tree_files = {}
    if ("channel" in args.combinations) or ("category" in args.combinations):
        datacards.annotate_trees(
            datacards_workspaces, "higgsCombine*.*.mH*.root", ([[
                os.path.join(
                    os.path.dirname(
                        template.replace("${CHANNEL}", "(.*)").replace(
                            "${MASS}", "\d*")), ".*.root")
                for template in datacard_filename_templates
                if "channel" in template
            ][0]] if "channel" in args.combinations else []) + ([[
                os.path.join(
                    os.path.dirname(
                        config["labels"][index] = os.path.join(
                            "%s_%s" %
                            (channel_renamings.get(channel, channel),
                             category_renamings.get(category, category)),
                            label_renamings.get(label, label))

                    config = uncertainty(config, name).get_config(shift)

                    if "PrintInfos" in config.get("analysis_modules", []):
                        config.get("analysis_modules", []).remove("PrintInfos")

                    harry_configs.append(config)
                    harry_args.append("-d %s --formats png pdf %s" %
                                      (args["input_dir"], args["args"]))

    higgs_plotter = higgsplot.HiggsPlotter(list_of_config_dicts=harry_configs,
                                           list_of_args_strings=harry_args,
                                           n_processes=args["n_processes"],
                                           n_plots=args["n_plots"])

    root_outputs = list(
        set([
            output
            for output in tools.flattenList(higgs_plotter.output_filenames)
            if output.endswith(".root")
        ]))
    command = "hadd -f %s %s" % (args["root_output"], " ".join(root_outputs))
    log.info(command)
    logger.subprocessCall(shlex.split(command))
    log.info("Merged ROOT output is saved to \"%s\"." % args["root_output"])
				for quantity in args["quantities"]:
					
					config["x_expressions"] = quantity
					config["x_bins"] = [channel+"_"+quantity]
					config["x_label"] = channel+"_"+quantity
				
					if not category is None:
						config["output_dir"] = os.path.join(config.setdefault("output_dir", "plots"), category)
			
					for index, label in enumerate(config.setdefault("labels", [])):
						config["labels"][index] = os.path.join("%s_%s" % (channel_renamings.get(channel, channel),
							                                                     category_renamings.get(category, category)),
							                                          label_renamings.get(label, label))
					
					config = uncertainty(config, name).get_config(shift)
					
					if "PrintInfos" in config.get("analysis_modules", []):
						config.get("analysis_modules", []).remove("PrintInfos")
					
					harry_configs.append(config)
					harry_args.append("-d %s --formats png pdf %s" % (args["input_dir"], args["args"]))
			
	higgs_plotter = higgsplot.HiggsPlotter(list_of_config_dicts=harry_configs, list_of_args_strings=harry_args, n_processes=args["n_processes"], n_plots=args["n_plots"])
	
	root_outputs = list(set([output for output in tools.flattenList(higgs_plotter.output_filenames) if output.endswith(".root")]))
	command = "hadd -f %s %s" % (args["root_output"], " ".join(root_outputs))
	log.info(command)
	logger.subprocessCall(shlex.split(command))
	log.info("Merged ROOT output is saved to \"%s\"." % args["root_output"])

			print("###################### annotations ######################")
			if "totstatuncs" in args.steps: # (scaled_lumi is None) and (asimov_polarisation is None):
				datacards.combine(
						datacards_cbs,
						datacards_workspaces,
						None,
						args.n_processes,
						"-M MultiDimFit --algo singles -P pol --redefineSignalPOIs pol "+datacards.stable_options+" -n",
						split_stat_syst_uncs=True,
						additional_freeze_nuisances=["r"]
				)

			print("###################### annotations ######################")
			annotation_replacements = {channel : index for (index, channel) in enumerate(["combined"] + args.channel)}
			annotation_replacements.update({binid : index+1 for (index, binid) in enumerate(sorted(list(set([datacards.configs.category2binid(category, channel=category[:2]) for category in tools.flattenList(args.categories)]))))})
			if not asimov_polarisation is None:
				annotation_replacements.update({"pol{:04}".format(int(asimov_polarisation*1000)) : asimov_polarisation})
			values_tree_files = {}
			if ("channel" in args.combinations) or ("category" in args.combinations):
				datacards.annotate_trees(
						datacards_workspaces,
						"higgsCombine*.*.mH*.root",
						([[os.path.join(os.path.dirname(template.replace("${CHANNEL}", "(.*)").replace("${MASS}", "\d*")), ".*.root") for template in datacard_filename_templates if "channel" in template][0]] if "channel" in args.combinations else [])+
						([[os.path.join(os.path.dirname(template.replace("${BINID}", "(\d*)").replace("${MASS}", "\d*")), ".*.root") for template in datacard_filename_templates if "category" in template][0]] if "category" in args.combinations else [])+
						([os.path.join("/(pol-?\d*)", ".*.root")] if not asimov_polarisation is None else []),
						annotation_replacements,
						args.n_processes,
						values_tree_files,
						"-t limit -b" + (" channel" if "channel" in args.combinations else "") + (" category" if "category" in args.combinations else "") + (" polarisation" if not asimov_polarisation is None else "")
				)
							"-M MultiDimFit --algo grid --points 200 -P pol --redefineSignalPOIs pol "+datacards.stable_options+" -n Scan "
					)
	
			if "totstatuncs" in args.steps: # (scaled_lumi is None) and (asimov_polarisation is None):
				datacards.combine(
						datacards_cbs,
						datacards_workspaces,
						None,
						args.n_processes,
						"-M MultiDimFit --algo singles -P pol --redefineSignalPOIs pol "+datacards.stable_options+" -n ",
						split_stat_syst_uncs=True,
						additional_freeze_nuisances=["r"]
				)
	
			annotation_replacements = {channel : index for (index, channel) in enumerate(["combined"] + args.channel)}
			annotation_replacements.update({binid : index+1 for (index, binid) in enumerate(sorted(list(set([datacards.configs.category2binid(category, channel=category[:2]) for category in tools.flattenList(args.categories)]))))})
			if not asimov_polarisation is None:
				annotation_replacements.update({"pol{:04}".format(int(asimov_polarisation*1000)) : asimov_polarisation})
			values_tree_files = {}
			if ("channel" in args.combinations) or ("category" in args.combinations):
				datacards.annotate_trees(
						datacards_workspaces,
						"higgsCombine*.*.mH*.root",
						([[os.path.join(os.path.dirname(template.replace("${CHANNEL}", "(.*)").replace("${MASS}", "\d*")), ".*.root") for template in datacard_filename_templates if "channel" in template][0]] if "channel" in args.combinations else [])+
						([[os.path.join(os.path.dirname(template.replace("${BINID}", "(\d*)").replace("${MASS}", "\d*")), ".*.root") for template in datacard_filename_templates if "category" in template][0]] if "category" in args.combinations else [])+
						([os.path.join("/(pol-?\d*)", ".*.root")] if not asimov_polarisation is None else []),
						annotation_replacements,
						args.n_processes,
						values_tree_files,
						"-t limit -b" + (" channel" if "channel" in args.combinations else "") + (" category" if "category" in args.combinations else "") + (" polarisation" if not asimov_polarisation is None else "")
				)
def main():
	parser = argparse.ArgumentParser(description="Collect matching trees from input files into one output tree",
	                                 parents=[logger.loggingParser])

	parser.add_argument("-i", "--input-dirs", help="Input directories = crab project directories containing the subdirectories with crab tasks", nargs="+")
	parser.add_argument("-o", "--output-dir", default=None,
	                    help="Local output directory. [Default: subdir \"results\" in first input directory]")
	parser.add_argument("-d", "--dcache-target", default=None,
	                    help="Directory on dCache (srm) where the files should be copied to. [Default: %(default)s]")
	
	parser.add_argument("--input-trees", nargs="+", default=["svfitCache"],
	                    help="Paths of input SVfit cache trees. [Default: %(default)s]")
	parser.add_argument("--output-tree", default="svfitCache",
	                    help="Name of output SVfit cache tree. [Default: %(default)s]")
	parser.add_argument("--previous-cache", default="",
	                    help="Path to a previous cache which will be merged. [Default: %(default)s]")
	parser.add_argument("--dcache", type=bool, default=False,
	                    help="Read&Write from and to desy dcache[Default: %(default)s]")
	parser.add_argument("--no-run", default=False, action="store_true",
	                    help="Do not run but only print dict  [Default: %(default)s]")
	parser.add_argument("-n", "--n-processes", type=int, default=1,
	                    help="Number of (parallel) processes. [Default: %(default)s]")
	
	args = parser.parse_args()
	logger.initLogger(args)
	
	if args.output_dir is None:
		args.output_dir = os.path.join(args.input_dirs[0], "results")
	
	# get paths to crab outputs
	#max_n_jobs = 8000
	#max_n_retrieve = 500
	get_crab_outputs_args = []
	for input_dir in args.input_dirs:
		#for jobid_start in xrange(1, max_n_jobs, max_n_retrieve):
		#	jobid_end = jobid_start + max_n_retrieve - 1
		#	get_crab_outputs_args.append([input_dir, "{jobid_start}-{jobid_end}".format(jobid_start=jobid_start, jobid_end=jobid_end)])
		get_crab_outputs_args.append([input_dir, "1-10"])
	
	tar_files = tools.parallelize(_get_crab_outputs, get_crab_outputs_args, max(args.n_processes, 2), description="crab getoutput --dump")
	tar_files = tools.flattenList(tar_files)
	
	# download and un-tar
	download_untar_args = [[tar_file, args.output_dir] for tar_file in tar_files]
	tools.parallelize(_download_untar, download_untar_args, args.n_processes, description="download and un-tar crab outputs")
	
	root_files = glob.glob(os.path.join(args.output_dir, "*.root"))
	# TODO: maybe add more root files from -i arguments, that did not need to be un-tared
	
	root_files_per_sample_nick = {}
	for root_file in root_files:
		basename = os.path.basename(root_file)
		sample_nick = basename[:basename.index("_job_")]
		root_files_per_sample_nick.setdefault(sample_nick, []).append(root_file)
	
	merged_output_dir = os.path.join(args.output_dir, "merged")
	if not os.path.exists(merged_output_dir):
		os.makedirs(merged_output_dir)
	merge_outputs_args = [[os.path.join(merged_output_dir, sample_nick+".root"), tmp_root_files, "-f"] for sample_nick, tmp_root_files in root_files_per_sample_nick.iteritems()]
	tools.parallelize(_merge_outputs, merge_outputs_args, args.n_processes, description="merging")
	
	if args.dcache_target:
		dcache_copy_commands = ["gfal-copy -v -f -r "+merged_output_dir+" "+args.dcache_target]
		tools.parallelize(_call_command, dcache_copy_commands, args.n_processes, description="copying to dCache")
	
	rm_commands = ["rm "+root_file for root_file in root_files]
	if args.dcache_target:
		rm_commands.extend(["rm "+os.path.join(merged_output_dir, sample_nick+".root") for sample_nick in root_files_per_sample_nick.keys()])
	tools.parallelize(_call_command, rm_commands, args.n_processes, description="deleting temporary files")
	
	log.info("\nJSON configuration for Artus:\n")
	config_output_dir = args.dcache_target if args.dcache_target else merged_output_dir
	for src, dst in filename_replacements.iteritems():
		config_output_dir = config_output_dir.replace(src, dst)
	for sample_nick in sorted(root_files_per_sample_nick.keys()):
		log.info("\""+sample_nick+"\" : \""+os.path.join(config_output_dir, sample_nick+".root")+"\",")