def _create_weight_configs(self): """Extract different weight configurations from `self._config`.""" weights_list = [[(name, weight) for weight in self._weight_dict[name]] for name in self._weight_dict] common_weights_list = [[(name, weight) for weight in self._common_weights[name]] for name in self._common_weights] common_weights_dicts = [] for item in itertools.product(*common_weights_list): d = {} for name,weight in item: d[name] = weight common_weights_dicts.append(d) weight_configs = [] for weight_conf in itertools.product(*weights_list): number_of_configs = len(weight_configs) for common_weight in common_weights_dicts: replaced_config \ = tuple([(dataset[0], reduce(lambda x,y: mps_tools.replace_factors(x, y, common_weight[y]), common_weight, dataset[1])) for dataset in weight_conf]) if replaced_config not in weight_configs: weight_configs.append(replaced_config) # default if config contains no common weights: if len(weight_configs) == number_of_configs: weight_configs.append(weight_conf) for weight_config in weight_configs: resolved_weight_config \ = [(dataset[0], mps_tools.compute_product_string(dataset[1])) for dataset in weight_config] self._weight_configs.append(resolved_weight_config)
def _create_weight_configs(self): """Extract different weight configurations from `self._config`.""" weights_list = [[(name, weight) for weight in self._weight_dict[name]] for name in self._weight_dict] common_weights_list = [[(name, weight) for weight in self._common_weights[name]] for name in self._common_weights] common_weights_dicts = [] for item in itertools.product(*common_weights_list): d = {} for name,weight in item: d[name] = weight common_weights_dicts.append(d) weight_configs = [] for weight_conf in itertools.product(*weights_list): number_of_configs = len(weight_configs) for common_weight in common_weights_dicts: replaced_config \ = tuple([(dataset[0], reduce(lambda x,y: mps_tools.replace_factors(x, y, common_weight[y]), common_weight, dataset[1])) for dataset in weight_conf]) if replaced_config not in weight_configs: weight_configs.append(replaced_config) # default if config contains no common weights: if len(weight_configs) == number_of_configs: weight_configs.append(weight_conf) for weight_config in weight_configs: resolved_weight_config \ = [(dataset[0], mps_tools.compute_product_string(dataset[1])) for dataset in weight_config] self._weight_configs.append(resolved_weight_config)
def _fetch_datasets(self): """Fetch internal and external dataset configurations.""" all_configs = collections.OrderedDict() all_configs["main"] = {"config": self._config, "general": self._general_options, "weight": None} all_configs.update(self._external_datasets) for config in six.itervalues(all_configs): global_weight = "1" if config["weight"] is None else config["weight"] if global_weight+self._config.config_path in self._common_weights: global_weight = self._common_weights[global_weight+ self._config.config_path] elif global_weight in self._common_weights: global_weight = self._common_weights[global_weight] else: global_weight = (global_weight,) common_weights = {} weight_dict = {} for section in config["config"].sections(): cache_datasetdir = os.environ["datasetdir"] if "general" in section: if config["config"].has_option("general", "datasetdir"): os.environ["datasetdir"] = config["config"].get("general", "datasetdir") elif section == "weights": for option in config["config"].options(section): common_weights[option] \ = [x.strip() for x in config["config"].get(section, option).split(",")] elif section.startswith("dataset:"): print("-"*75) # set name from section-name name = section[8:] if name in self._datasets: print("WARNING: Duplicate definition of dataset '{}'".format(name)) print(" -> Using defintion in '{}':\n".format(config["config"].config_path)) print(" [{}]".format(section)) for k,v in config["config"].items(section): print(" ", k, "=", v) print() self._datasets[name] = {} # extract weight for the dataset if config["config"].has_option(section, "weight"): self._weight_dict[name] \ = [x.strip() for x in config["config"].get(section, "weight").split(",")] else: self._weight_dict[name] = ["1.0"] self._weight_dict[name] = [global_w+"*"+w for w in self._weight_dict[name] for global_w in global_weight] weight_dict[name] = self._weight_dict[name] # extract essential variables for var in ("inputFileList", "collection"): try: self._datasets[name][var] = config["config"].get(section, var) except ConfigParser.NoOptionError: print("No", var, "found in", section+". Please check ini-file.") sys.exit(1) # get globaltag and configTemplate. If none in section, try to get # default from [general] section. for var in ("configTemplate", "globaltag"): try: self._datasets[name][var] = config["config"].get(section, var) except (ConfigParser.NoSectionError,ConfigParser.NoOptionError): try: self._datasets[name][var] = config["general"][var] except KeyError: try: self._datasets[name][var] \ = all_configs["main"]["general"][var] except KeyError: print("No",var,"found in ["+section+"]", end=' ') print("and no default in [general] section.") sys.exit(1) # extract non-essential options if "ALCARECOTkAlCosmics" in self._datasets[name]["collection"]: try: self._datasets[name]["cosmicsZeroTesla"] \ = config["config"].getboolean(section,"cosmicsZeroTesla") except ConfigParser.NoOptionError: print("No option cosmicsZeroTesla found in", section,"even though it is required for dataset type", self._datasets[name]["collection"], ". Please check ini-file.") sys.exit(1) try: self._datasets[name]["cosmicsDecoMode"] \ = config["config"].getboolean(section,"cosmicsDecoMode") except ConfigParser.NoOptionError: print("No option cosmicsDecoMode found in", section,"even though it is required for dataset type", self._datasets[name]["collection"], ".Please check ini-file.") sys.exit(1) self._datasets[name]["primaryWidth"] = -1.0 if config["config"].has_option(section,"primaryWidth"): self._datasets[name]["primaryWidth"] \ = config["config"].getfloat(section,"primaryWidth") self._datasets[name]["numberOfEvents"] = -1 if config["config"].has_option(section, "numberOfEvents"): self._datasets[name]["numberOfEvents"] \ = config["config"].getint(section, "numberOfEvents") self._datasets[name]["json"] = "" try: self._datasets[name]["json"] = config["config"].get(section,"json") except ConfigParser.NoOptionError: try: self._datasets[name]["json"] = config["general"]["json"] except KeyError: try: self._datasets[name]["json"] \ = all_configs["main"]["general"]["json"] except KeyError: print("No json given in either [general] or", end=' ') print("["+section+"] sections.") print(" -> Proceeding without json-file.") #replace ${datasetdir} and other variables, e.g. $CMSSW_BASE for var in ("inputFileList", "json", "configTemplate"): self._datasets[name][var] \ = os.path.expandvars(self._datasets[name][var]) # Get number of jobs from lines in inputfilelist self._datasets[name]["njobs"] = 0 try: with open(self._datasets[name]["inputFileList"], "r") as filelist: for line in filelist: if "CastorPool" in line: continue # ignore empty lines if not line.strip()=="": self._datasets[name]["njobs"] += 1 except IOError: print("Inputfilelist", self._datasets[name]["inputFileList"], end=' ') print("does not exist.") sys.exit(1) if self._datasets[name]["njobs"] == 0: print("Number of jobs is 0. There may be a problem with the inputfilelist:") print(self._datasets[name]["inputFileList"]) sys.exit(1) # Check if njobs gets overwritten in .ini-file if config["config"].has_option(section, "njobs"): if config["config"].getint(section, "njobs") <= self._datasets[name]["njobs"]: self._datasets[name]["njobs"] = config["config"].getint(section, "njobs") else: print("'njobs' is bigger than the number of files for this", end=' ') print("dataset:", self._datasets[name]["njobs"]) print("Using default.") else: print("No number of jobs specified. Using number of files in", end=' ') print("inputfilelist as the number of jobs.") # check if local weights override global weights and resolve name clashes for weight_name, weight_values in six.iteritems(common_weights): for key, weight in six.iteritems(weight_dict): if any([weight_name in w for w in weight]): self._common_weights[weight_name+config["config"].config_path] = weight_values self._weight_dict[key] = [mps_tools.replace_factors(w, weight_name, weight_name+config["config"].config_path) for w in weight] else: self._common_weights[weight_name] = weight_values self._weight_dict[key] = weight os.environ["datasetdir"] = cache_datasetdir if len(self._datasets) == 0: print("No dataset section defined in '{0}'".format( ", ".join([self._args.aligmentConfig]+self._external_datasets.keys()))) print("At least one section '[dataset:<name>]' is required.") sys.exit(1) self._global_tag = self._datasets[name]["globaltag"]
def _fetch_datasets(self): """Fetch internal and external dataset configurations.""" all_configs = collections.OrderedDict() all_configs["main"] = {"config": self._config, "general": self._general_options, "weight": None} all_configs.update(self._external_datasets) for config in all_configs.itervalues(): global_weight = "1" if config["weight"] is None else config["weight"] if global_weight+self._config.config_path in self._common_weights: global_weight = self._common_weights[global_weight+ self._config.config_path] elif global_weight in self._common_weights: global_weight = self._common_weights[global_weight] else: global_weight = (global_weight,) common_weights = {} weight_dict = {} for section in config["config"].sections(): cache_datasetdir = os.environ["datasetdir"] if "general" in section: if config["config"].has_option("general", "datasetdir"): os.environ["datasetdir"] = config["config"].get("general", "datasetdir") elif section == "weights": for option in config["config"].options(section): common_weights[option] \ = [x.strip() for x in config["config"].get(section, option).split(",")] elif section.startswith("dataset:"): print "-"*75 # set name from section-name name = section[8:] if name in self._datasets: print "WARNING: Duplicate definition of dataset '{}'".format(name) print " -> Using defintion in '{}':\n".format(config["config"].config_path) print " [{}]".format(section) for k,v in config["config"].items(section): print " ", k, "=", v print self._datasets[name] = {} # extract weight for the dataset if config["config"].has_option(section, "weight"): self._weight_dict[name] \ = [x.strip() for x in config["config"].get(section, "weight").split(",")] else: self._weight_dict[name] = ["1.0"] self._weight_dict[name] = [global_w+"*"+w for w in self._weight_dict[name] for global_w in global_weight] weight_dict[name] = self._weight_dict[name] # extract essential variables for var in ("inputFileList", "collection"): try: self._datasets[name][var] = config["config"].get(section, var) except ConfigParser.NoOptionError: print "No", var, "found in", section+". Please check ini-file." sys.exit(1) # get globaltag and configTemplate. If none in section, try to get # default from [general] section. for var in ("configTemplate", "globaltag"): try: self._datasets[name][var] = config["config"].get(section, var) except (ConfigParser.NoSectionError,ConfigParser.NoOptionError): try: self._datasets[name][var] = config["general"][var] except KeyError: try: self._datasets[name][var] \ = all_configs["main"]["general"][var] except KeyError: print "No",var,"found in ["+section+"]", print "and no default in [general] section." sys.exit(1) # extract non-essential options self._datasets[name]["cosmicsZeroTesla"] = False if config["config"].has_option(section,"cosmicsZeroTesla"): self._datasets[name]["cosmicsZeroTesla"] \ = config["config"].getboolean(section,"cosmicsZeroTesla") self._datasets[name]["cosmicsDecoMode"] = False if config["config"].has_option(section,"cosmicsDecoMode"): self._datasets[name]["cosmicsDecoMode"] \ = config["config"].getboolean(section,"cosmicsDecoMode") self._datasets[name]["primaryWidth"] = -1.0 if config["config"].has_option(section,"primaryWidth"): self._datasets[name]["primaryWidth"] \ = config["config"].getfloat(section,"primaryWidth") self._datasets[name]["numberOfEvents"] = -1 if config["config"].has_option(section, "numberOfEvents"): self._datasets[name]["numberOfEvents"] \ = config["config"].getint(section, "numberOfEvents") self._datasets[name]["json"] = "" try: self._datasets[name]["json"] = config["config"].get(section,"json") except ConfigParser.NoOptionError: try: self._datasets[name]["json"] = config["general"]["json"] except KeyError: try: self._datasets[name]["json"] \ = all_configs["main"]["general"]["json"] except KeyError: print "No json given in either [general] or", print "["+section+"] sections." print " -> Proceeding without json-file." #replace ${datasetdir} and other variables, e.g. $CMSSW_BASE for var in ("inputFileList", "json", "configTemplate"): self._datasets[name][var] \ = os.path.expandvars(self._datasets[name][var]) # Get number of jobs from lines in inputfilelist self._datasets[name]["njobs"] = 0 try: with open(self._datasets[name]["inputFileList"], "r") as filelist: for line in filelist: if "CastorPool" in line: continue # ignore empty lines if not line.strip()=="": self._datasets[name]["njobs"] += 1 except IOError: print "Inputfilelist", self._datasets[name]["inputFileList"], print "does not exist." sys.exit(1) if self._datasets[name]["njobs"] == 0: print "Number of jobs is 0. There may be a problem with the inputfilelist:" print self._datasets[name]["inputFileList"] sys.exit(1) # Check if njobs gets overwritten in .ini-file if config["config"].has_option(section, "njobs"): if config["config"].getint(section, "njobs") <= self._datasets[name]["njobs"]: self._datasets[name]["njobs"] = config["config"].getint(section, "njobs") else: print "'njobs' is bigger than the number of files for this", print "dataset:", self._datasets[name]["njobs"] print "Using default." else: print "No number of jobs specified. Using number of files in", print "inputfilelist as the number of jobs." # check if local weights override global weights and resolve name clashes for weight_name, weight_values in common_weights.iteritems(): for key, weight in weight_dict.iteritems(): if any([weight_name in w for w in weight]): self._common_weights[weight_name+config["config"].config_path] = weight_values self._weight_dict[key] = [mps_tools.replace_factors(w, weight_name, weight_name+config["config"].config_path) for w in weight] else: self._common_weights[weight_name] = weight_values self._weight_dict[key] = weight os.environ["datasetdir"] = cache_datasetdir if len(self._datasets) == 0: print "No dataset section defined in '{0}'".format( ", ".join([self._args.aligmentConfig]+self._external_datasets.keys())) print "At least one section '[dataset:<name>]' is required." sys.exit(1) self._global_tag = self._datasets[name]["globaltag"]