def filter_idc_data(data_a, data_c, z_range): """ Select the A-side and/or C-side data based on the z range. :param list data_a: list of arrays of values from the A-side :param list data_c: list of arrays of values from the C-side :param list z_range: a list of [min_z, max_z] values. If the interval contains positive z, A-side data will be used. Similarly, for any negative z C-side data is used. :return: tuple with selected data. If both A and C-side are selected, the correspondings arrays are stacked. :rtype: tuple """ # TODO: Getter and application of Fourier coefficients need to be modified to handle both A and # C side at the same time if z_range[0] < 0 and z_range[1] > 0: # pylint: disable=chained-comparison logger = get_logger() logger.fatal("Framework not yet fully prepared to use data from both A and C side at once.") output_data = [] for data in data_a: output_data.append([]) if z_range[1] > 0: for ind, data in enumerate(data_a): output_data[ind] = np.hstack((output_data[ind], data / (scipy.constants.e * NELE_PER_ADC))) # C -> ADC if z_range[0] < 0: for ind, data in enumerate(data_c): output_data[ind] = np.hstack((output_data[ind], data / (scipy.constants.e * NELE_PER_ADC))) # C -> ADC return tuple(output_data)
def load_train_apply(input_data, event_index, input_z_range, output_z_range, grid_r, grid_rphi, grid_z, opt_train, opt_pred): [vec_mean_sc, vec_fluctuation_sc, vec_fluctuation_dist_r, vec_fluctuation_dist_rphi, vec_fluctuation_dist_z] = \ load_data(input_data, event_index, input_z_range, output_z_range) dim_input = sum(opt_train) dim_output = sum(opt_pred) inputs = np.empty((grid_rphi, grid_r, grid_z, dim_input)) exp_outputs = np.empty((grid_rphi, grid_r, grid_z, dim_output)) indexfillx = 0 if opt_train[0] == 1: inputs[:, :, :, indexfillx] = \ vec_mean_sc.reshape(grid_rphi, grid_r, grid_z) indexfillx = indexfillx + 1 if opt_train[1] == 1: inputs[:, :, :, indexfillx] = \ vec_fluctuation_sc.reshape(grid_rphi, grid_r, grid_z) if dim_output > 1: logger = get_logger() logger.fatal("YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1") flucs = np.array((vec_fluctuation_dist_r, vec_fluctuation_dist_rphi, vec_fluctuation_dist_z)) sel_flucs = flucs[np.array(opt_pred) == 1] for ind, vec_fluctuation_dist in enumerate(sel_flucs): exp_outputs[:, :, :, ind] = \ vec_fluctuation_dist.reshape(grid_rphi, grid_r, grid_z) #print("DIMENSION INPUT TRAINING", inputs.shape) #print("DIMENSION OUTPUT TRAINING", exp_outputs.shape) return inputs, exp_outputs
def __init__(self): """ Initialize the validator """ logger = get_logger() logger.info("IDCDataValidator::Init") self.model = None self.config = None
def log_memory_usage(objects): """ Write memory sizes of the objects to the console. :param list objects: list of tuples(obj, str) with objects and logging comments """ logger = get_logger() for obj, comment in objects: size, mult = format_memory(get_memory_usage(obj)) logger.info("%s memory usage: %d %sB", comment, size, mult)
def load_train_apply(dirinput, event_index, z_range, grid_r, grid_rphi, grid_z, opt_train, opt_pred): """ Load inputs and outputs for training / apply for one event. NOTE: Function for the old data, will be deprecated. :param str dirinput: the directory with the input data, value taken from the config file :param list event_index: a list of [random_index, mean_map_index] indices of the random and the mean map, respectively. :param list z_range: a list of [min_z, max_z] values, the input and output data is taken from this interval :param int grid_r: grid granularity (number of voxels) along r-axis :param int grid_rphi: grid granularity (number of voxels) along rphi-axis :param int grid_z: grid granularity (number of voxels) along z-axis :param list opt_train: list of 2 binary values corresponding to activating the train input of average space charge and space-charge fluctuations, respectively, taken from the config file :param list opt_pred: list of 3 binary values corresponding to activating the prediction of r, rphi and z distortion corrections, taken from the config file :return: tuple of inputs and expected outputs :rtype: tuple """ [vec_mean_sc, vec_fluctuation_sc, vec_fluctuation_dist_r, vec_fluctuation_dist_rphi, vec_fluctuation_dist_z] = \ load_data(dirinput, event_index, z_range) dim_input = sum(opt_train) dim_output = sum(opt_pred) inputs = np.empty((grid_rphi, grid_r, grid_z, dim_input)) exp_outputs = np.empty((grid_rphi, grid_r, grid_z, dim_output)) indexfillx = 0 if opt_train[0] == 1: inputs[:, :, :, indexfillx] = \ vec_mean_sc.reshape(grid_rphi, grid_r, grid_z) indexfillx = indexfillx + 1 if opt_train[1] == 1: inputs[:, :, :, indexfillx] = \ vec_fluctuation_sc.reshape(grid_rphi, grid_r, grid_z) if dim_output > 1: logger = get_logger() logger.fatal( "YOU CAN PREDICT ONLY 1 DISTORTION. The sum of opt_predout == 1") flucs = np.array((vec_fluctuation_dist_r, vec_fluctuation_dist_rphi, vec_fluctuation_dist_z)) sel_flucs = flucs[np.array(opt_pred) == 1] for ind, vec_fluctuation_dist in enumerate(sel_flucs): exp_outputs[:, :, :, ind] = \ vec_fluctuation_dist.reshape(grid_rphi, grid_r, grid_z) return inputs, exp_outputs
def log_time(start, end, comment): """ Write elapsed time to the console. :param double stat: start time :param double end: end time :param str comment: string attached to the console output """ logger = get_logger() elapsed_time = end - start time_min = int(elapsed_time // 60) time_sec = int(elapsed_time % 60) logger.info("Elapsed time %s: %dm %ds", comment, time_min, time_sec)
def setup_tf(): # optionally limit GPU memory usage if os.environ.get('TPCwithDNNSETMEMLIMIT'): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: tf.config.experimental.set_virtual_device_configuration(gpus[0], \ [tf.config.experimental.VirtualDeviceConfiguration(memory_limit= \ int(os.environ.get('TPCwithDNNSETMEMLIMIT')))]) # for gpu in gpus: # tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: logger = get_logger() logger.error(e)
def log_total_memory_usage(comment=None): """ Write the memory usage of the program to the console. :param str comment: additional comment for logging """ logger = get_logger() if comment is not None: logger.info(comment) size, mult = format_memory(psutil.virtual_memory().available) logger.info("Free RAM: %d %sB", size, mult) size, mult = format_memory( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) logger.info("RAM used by application: %d %sB", size, mult)
def load_event_idc(dirinput, event_index, input_z_range, output_z_range, opt_pred): inputs, exp_outputs = load_data_one_idc(dirinput, event_index, input_z_range, output_z_range, opt_pred) dim_output = sum(opt_pred) if dim_output > 1: logger = get_logger() logger.fatal("YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1") #print("DIMENSION INPUT TRAINING", inputs.shape) #print("DIMENSION OUTPUT TRAINING", exp_outputs.shape) return inputs, exp_outputs
def main(): """ The global main function """ logger = get_logger() logger.info("Starting TPC ML...") if len(sys.argv) == 2: default_file_name = sys.argv[1] print("Using user specified steering options file: %s" % default_file_name) else: default_file_name = "default.yml" with open(default_file_name, 'r') as default_data: default = yaml.safe_load(default_data) case = default["case"] with open("database_parameters_%s.yml" % case, 'r') as parameters_data: db_parameters = yaml.safe_load(parameters_data)
def mat_to_vec(opt_pred, mat_tuple): """ Convert multidimensional arrays to flat vectors. :param list opt_pred: list of 3 binary values corresponding to activation of r, rphi and z distortion corrections, taken from the config file :param tuple mat_tuple: tuple of arrays to be flattened :return: tuple of flattened input arrays :rtype: tuple """ if sum(opt_pred) > 1: logger = get_logger() logger.fatal("Framework not yet fully prepared for more than one distortion direction.") sel_opts = np.array(opt_pred) > 0 res = tuple(np.hstack(mat[sel_opts]) for mat in mat_tuple) return res
def train(self): """ Train the optimizer. """ self.config.logger.info("XGBoostOptimiser::train") if self.config.dim_output > 1: logger = get_logger() logger.fatal( "YOU CAN PREDICT ONLY 1 DISTORTION. dim_output is bigger than 1." ) model = XGBRFRegressor(verbosity=1, **(self.config.params)) start = timer() inputs, exp_outputs, *_ = self.__get_data("train") end = timer() log_time(start, end, "for loading training data") log_memory_usage( ((inputs, "Input train data"), (exp_outputs, "Output train data"))) log_total_memory_usage("Memory usage after loading data") if self.config.plot_train: inputs_val, outputs_val, *_ = self.__get_data("validation") log_memory_usage(((inputs_val, "Input validation data"), (outputs_val, "Output validation data"))) log_total_memory_usage( "Memory usage after loading validation data") self.__plot_train(model, inputs, exp_outputs, inputs_val, outputs_val) start = timer() model.fit(inputs, exp_outputs) end = timer() log_time(start, end, "actual train") model.get_booster().feature_names = get_input_names_oned_idc( self.config.opt_usederivative, self.config.num_fourier_coeffs_train) self.__plot_feature_importance(model) self.save_model(model)
def main(): """ The global main function """ logger = get_logger() logger.info("Starting TPC ML...") log_total_memory_usage("Initial memory usage") parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-c", "--config", dest="config_file", default="config_model_parameters.yml", type=str, help="path to the *.yml configuration file") parser.add_argument("-s", "--steer", dest="steer_file", default="default.yml", type=str, help="path to the *.yml steering file") # parameters for steer file parser.add_argument("--dotrain", action='store_true', default=argparse.SUPPRESS, help="Perform the training") parser.add_argument("--docreateinputdata", action='store_true', default=argparse.SUPPRESS, help="Create input data trees") parser.add_argument("--docreatevaldata", action='store_true', default=argparse.SUPPRESS, help="Create validation data trees") # parameters for config file parser.add_argument("--rndaugment", action='store_true', default=argparse.SUPPRESS, help="Use random-random augmentation for training") parser.add_argument("--ntrain1d", dest='train_events_oned', type=int, default=argparse.SUPPRESS, help="Set custom number of training events") parser.add_argument("--nval", dest='val_events', type=int, default=argparse.SUPPRESS, help="Set custom number of validation events") parser.add_argument( "--frac", dest='downsample_fraction', type=float, default=argparse.SUPPRESS, help="Set downsampling fraction if --downsample is set") parser.add_argument("--nestimators", dest='n_estimators', type=int, default=argparse.SUPPRESS, help="Set the number of trees for xgboost models") parser.add_argument("--maxdepth", dest='max_depth', type=int, default=argparse.SUPPRESS, help="Set maximum depth of trees for xgboost models") args = parser.parse_args() logger.info("Using configuration: %s steer file: %s", args.config_file, args.steer_file) with open(args.steer_file, "r") as steer_data: default = yaml.safe_load(steer_data) with open(args.config_file, "r") as config_data: config_parameters = yaml.safe_load(config_data) logger.info("Arguments provided: %s", str(args)) if "dotrain" in args: default['dotrain'] = True if "docreateinputdata" in args or "docreatevaldata" in args: default['docreatevaldata'] = True config_parameters['common']['nd_validate_model'] = False if "docreatevaldata" in args: config_parameters['common']['nd_validate_model'] = True if "rndaugment" in args: config_parameters['common']['rnd_augment'] = True if "train_events_oned" in args: config_parameters['xgboost']['train_events'] = [args.train_events_oned] if "val_events" in args: config_parameters['common']['val_events'] = args.val_events if "downsample_fraction" in args: config_parameters['xgboost']['downsample'] = True config_parameters['xgboost'][ 'downsample_fraction'] = args.downsample_fraction if "n_estimators" in args: config_parameters['xgboost']['params'][ 'n_estimators'] = args.n_estimators if "max_depth" in args: config_parameters['xgboost']['params']['max_depth'] = args.max_depth models, corr, dataval = init_models(config_parameters) events_counts = (get_events_counts( config_parameters[model.name]["train_events"], config_parameters[model.name]["validation_events"], config_parameters[model.name]["apply_events"]) for model in models) ranges_rnd = config_parameters["common"]["range_rnd_index_train"] ranges_mean = config_parameters["common"]["range_mean_index"] if config_parameters["common"]["rnd_augment"]: max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * ( ranges_rnd[1] - ranges_rnd[0]) else: max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * \ (ranges_mean[1] + 1 - ranges_mean[0]) for model, model_events_counts in zip(models, events_counts): all_events_counts = [] for (train_events, val_events, apply_events) in model_events_counts: total_events = train_events + val_events + apply_events if total_events > max_available_events: logger.warning( "Too big number of events requested: %d available: %d", total_events, max_available_events) continue all_events_counts.append( (train_events, val_events, apply_events, total_events)) ranges = { "train": [0, train_events], "val": [train_events, train_events + val_events], "apply": [train_events + val_events, total_events] } model.config.set_ranges(ranges, total_events, train_events, val_events, apply_events) run_model_and_val(model, dataval, default, config_parameters["common"]) # TODO: apply the correction and save in files if corr is not None: pass if default["doprofile"] is True: model.draw_profile(all_events_counts) logger.info("Program finished.")
def main(): """ The global main function """ logger = get_logger() logger.info("Starting TPC ML...") log_total_memory_usage("Initial memory usage") parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-c", "--config", dest="config_file", default="config_model_parameters.yml", type=str, help="path to the *.yml configuration file") parser.add_argument("-s", "--steer", dest="steer_file", default="default.yml", type=str, help="path to the *.yml steering file") # parameters for steer file parser.add_argument("--dotrain", action="store_true", default=argparse.SUPPRESS, help="Perform the training") parser.add_argument("--docreatendvaldata", action="store_true", default=argparse.SUPPRESS, help="Create validation data trees") parser.add_argument("--docache", action="store_true", default=argparse.SUPPRESS, help="Cache training data if not already existing") # parameters for config file parser.add_argument("--rndaugment", action="store_true", default=argparse.SUPPRESS, help="Use random-random augmentation for training") parser.add_argument("--ntrain1d", dest="train_events_oned", type=int, default=argparse.SUPPRESS, help="Set custom number of training events") parser.add_argument("--nval", dest="nd_val_events", type=int, default=argparse.SUPPRESS, help="Set custom number of max nd validation events") parser.add_argument("--dnpoints", dest="downsample_npoints", type=int, default=argparse.SUPPRESS, help="Set number of downsampling points") parser.add_argument("--nestimators", dest="n_estimators", type=int, default=argparse.SUPPRESS, help="Set the number of trees for xgboost models") parser.add_argument("--maxdepth", dest="max_depth", type=int, default=argparse.SUPPRESS, help="Set maximum depth of trees for xgboost models") parser.add_argument("--nfftidcs", dest="num_fft_idcs", type=int, default=argparse.SUPPRESS, help="Set number of 1D IDCs used for" \ " the FFT. Corresponds to the ion drift time (ms) used in simulation.") parser.add_argument("--nfouriertrain", dest="num_fourier_coeffs_train", type=int, default=argparse.SUPPRESS, help="Set number of Fourier coefficients" \ " to take from the 1D IDC train input") parser.add_argument("--nfourierapply", dest="num_fourier_coeffs_apply", type=int, default=argparse.SUPPRESS, help="Set number of Fourier coefficients" \ " to take from the 1D IDC apply input") # parameters for caching parser.add_argument("--cache-events", dest="cache_events", type=int, default=argparse.SUPPRESS, help="Set the number of events to cache") parser.add_argument("--cache-train", action="store_true", default=argparse.SUPPRESS, help="Use cached data for training") parser.add_argument( "--cache-file-size", dest="cache_file_size", type=int, default=argparse.SUPPRESS, help="Set the number of events per single temporary cache file") args = parser.parse_args() logger.info("Using configuration: %s steer file: %s", args.config_file, args.steer_file) with open(args.steer_file, "r", encoding="utf-8") as steer_data: default = yaml.safe_load(steer_data) with open(args.config_file, "r", encoding="utf-8") as config_data: config_parameters = yaml.safe_load(config_data) logger.info("Arguments provided: %s", str(args)) if "dotrain" in args: default["dotrain"] = True if "docreatendvaldata" in args: default["docreatendvaldata"] = True if "docache" in args: default["docache"] = True # if "rndaugment" in args: config_parameters["common"]["rnd_augment"] = True if "train_events_oned" in args: config_parameters["xgboost"]["train_events"] = [args.train_events_oned] if "nd_val_events" in args: config_parameters["common"]["nd_val_events"] = args.nd_val_events if "downsample_npoints" in args: config_parameters["xgboost"]["downsample"] = True config_parameters["xgboost"][ "downsample_npoints"] = args.downsample_npoints if "n_estimators" in args: config_parameters["xgboost"]["params"][ "n_estimators"] = args.n_estimators if "max_depth" in args: config_parameters["xgboost"]["params"]["max_depth"] = args.max_depth if "num_fft_idcs" in args: config_parameters["common"]["num_fft_idcs"] = args.num_fft_idcs if "num_fourier_coeffs_train" in args: config_parameters["common"][ "num_fourier_coeffs_train"] = args.num_fourier_coeffs_train if "num_fourier_coeffs_apply" in args: config_parameters["common"][ "num_fourier_coeffs_apply"] = args.num_fourier_coeffs_apply # if "cache_events" in args: config_parameters["xgboost"]["cache_events"] = args.cache_events if "cache_train" in args: config_parameters["xgboost"]["cache_train"] = True if "cache_file_size" in args: config_parameters["xgboost"]["cache_file_size"] = args.cache_file_size models, corr, dataval = init_models(config_parameters) events_counts = (get_events_counts( config_parameters[model.name]["train_events"], config_parameters[model.name]["validation_events"], config_parameters[model.name]["apply_events"]) for model in models) ranges_rnd = config_parameters["common"]["range_rnd_index_train"] ranges_mean = config_parameters["common"]["range_mean_index"] if config_parameters["common"]["rnd_augment"]: max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * ( ranges_rnd[1] - ranges_rnd[0]) else: max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * \ (ranges_mean[1] + 1 - ranges_mean[0]) for model in models: if default["docache"] is True and model.name == "xgboost": start = timer() model.cache_train_data() end = timer() log_time(start, end, "cache") for model, model_events_counts in zip(models, events_counts): all_events_counts = [] for (train_events, val_events, apply_events) in model_events_counts: total_events = train_events + val_events + apply_events if total_events > max_available_events: logger.warning( "Too big number of events requested: %d available: %d", total_events, max_available_events) continue all_events_counts.append( (train_events, val_events, apply_events, total_events)) ranges = { "train": [0, train_events], "validation": [train_events, train_events + val_events], "apply": [train_events + val_events, total_events] } model.config.set_ranges(ranges, total_events, train_events, val_events, apply_events) run_model_and_val(model, dataval, default, config_parameters["common"]) # TODO: apply the correction and save in files if corr is not None: pass if default["doprofile"] is True: model.draw_profile(all_events_counts) logger.info("Program finished.")
def __init__(self, data_param): """ Read and store the parameters from the file. :param dict data_param: dictionary of values read from the config file """ self.logger = get_logger() # Dataset config self.grid_phi = data_param["grid_phi"] self.grid_z = data_param["grid_z"] self.grid_r = data_param["grid_r"] self.z_range = data_param["z_range"] self.opt_train = data_param["opt_train"] self.opt_predout = data_param["opt_predout"] self.opt_usederivative = data_param["opt_usederivative"] self.nameopt_predout = data_param["nameopt_predout"] self.dim_input = sum(self.opt_train) self.dim_output = sum(self.opt_predout) self.num_fft_idcs = data_param["num_fft_idcs"] self.num_fourier_coeffs_train = data_param["num_fourier_coeffs_train"] self.num_fourier_coeffs_apply = data_param["num_fourier_coeffs_apply"] self.logger.info( "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)", self.opt_train[0], self.opt_train[1]) self.logger.info("Outputs: (dR, dRPhi, dZ) = (%d, %d, %d)", self.opt_predout[0], self.opt_predout[1], self.opt_predout[2]) # Directories self.dirmodel = data_param["dirmodel"] self.dirapply = data_param["dirapply"] self.dirplots = data_param["dirplots"] self.dirtree = data_param["dirtree"] self.dirhist = data_param["dirhist"] train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \ else data_param["dirinput_nobias"] val_dir = data_param["dirinput_bias"] if data_param["validation_bias"] \ else data_param["dirinput_nobias"] apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \ else data_param["dirinput_nobias"] self.dirinput_train = "%s/SC-%d-%d-%d" % \ (train_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_validation = "%s/SC-%d-%d-%d" % \ (val_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_apply = "%s/SC-%d-%d-%d" % \ (apply_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_nd_val = "%s/SC-%d-%d-%d" % ( data_param["dirinput_nobias"], self.grid_z, self.grid_r, self.grid_phi) for dirname in (self.dirmodel, self.dirapply, self.dirplots, self.dirtree, self.dirhist): if not os.path.isdir(dirname): os.makedirs(dirname) self.suffix = None self.suffix_ds = "phi%d_r%d_z%d" % \ (self.grid_phi, self.grid_r, self.grid_z) # Parameters for getting input indices self.maxrandomfiles = data_param["maxrandomfiles"] self.nd_val_events = data_param["nd_val_events"] self.range_rnd_index_train = data_param["range_rnd_index_train"] self.range_rnd_index_nd_val = data_param["range_rnd_index_nd_val"] self.rnd_augment = data_param["rnd_augment"] self.part_inds = None self.nd_val_partition = data_param["nd_val_partition"] self.range_mean_index = data_param["range_mean_index"] self.indices_events_means = None self.partition = None self.total_events = 0 self.train_events = 0 self.val_events = 0 self.apply_events = 0
def __init__(self): super().__init__() logger = get_logger() logger.info("IDCDataValidator::Init") self.model = None self.config = None
def __init__(self, data_param, case): self.logger = get_logger() self.logger.info("DnnOptimizer::Init\nCase: %s", case) # Dataset config self.grid_phi = data_param["grid_phi"] self.grid_z = data_param["grid_z"] self.grid_r = data_param["grid_r"] self.selopt_input = data_param["selopt_input"] self.selopt_output = data_param["selopt_output"] self.opt_train = data_param["opt_train"] self.opt_predout = data_param["opt_predout"] self.nameopt_predout = data_param["nameopt_predout"] self.dim_input = sum(self.opt_train) self.dim_output = sum(self.opt_predout) self.use_scaler = data_param["use_scaler"] # Directories self.dirmodel = data_param["dirmodel"] self.dirval = data_param["dirval"] train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \ else data_param["dirinput_nobias"] test_dir = data_param["dirinput_bias"] if data_param["test_bias"] \ else data_param["dirinput_nobias"] apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \ else data_param["dirinput_nobias"] self.dirinput_train = "%s/SC-%d-%d-%d/" % \ (train_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_test = "%s/SC-%d-%d-%d/" % \ (test_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_apply = "%s/SC-%d-%d-%d/" % \ (apply_dir, self.grid_z, self.grid_r, self.grid_phi) # DNN config self.filters = data_param["filters"] self.pooling = data_param["pooling"] self.batch_size = data_param["batch_size"] self.shuffle = data_param["shuffle"] self.depth = data_param["depth"] self.batch_normalization = data_param["batch_normalization"] self.dropout = data_param["dropout"] self.epochs = data_param["epochs"] self.lossfun = data_param["lossfun"] self.metrics = data_param["metrics"] self.adamlr = data_param["adamlr"] self.params = { 'phi_slice': self.grid_phi, 'r_row': self.grid_r, 'z_col': self.grid_z, 'batch_size': self.batch_size, 'shuffle': self.shuffle, 'opt_train': self.opt_train, 'opt_predout': self.opt_predout, 'selopt_input': self.selopt_input, 'selopt_output': self.selopt_output, 'use_scaler': self.use_scaler } self.suffix = "phi%d_r%d_z%d_filter%d_poo%d_drop%.2f_depth%d_batch%d_scaler%d" % \ (self.grid_phi, self.grid_r, self.grid_z, self.filters, self.pooling, self.dropout, self.depth, self.batch_normalization, self.use_scaler) self.suffix = "%s_useSCMean%d_useSCFluc%d" % \ (self.suffix, self.opt_train[0], self.opt_train[1]) self.suffix = "%s_pred_doR%d_dophi%d_doz%d" % \ (self.suffix, self.opt_predout[0], self.opt_predout[1], self.opt_predout[2]) self.suffix_ds = "phi%d_r%d_z%d" % \ (self.grid_phi, self.grid_r, self.grid_z) if not os.path.isdir("plots"): os.makedirs("plots") if not os.path.isdir(self.dirmodel): os.makedirs(self.dirmodel) if not os.path.isdir(self.dirval): os.makedirs(self.dirval) self.logger.info("I am processing the configuration %s", self.suffix) if self.dim_output > 1: self.logger.fatal( "YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1" ) self.logger.info( "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)", self.opt_train[0], self.opt_train[1]) # Parameters for getting input indices self.maxrandomfiles = data_param["maxrandomfiles"] self.range_mean_index = data_param["range_mean_index"] self.indices_events_means = None self.partition = None self.total_events = 0 self.train_events = 0 self.test_events = 0 self.apply_events = 0 gROOT.SetStyle("Plain") gROOT.SetBatch()
def merge_root_file(target, source_list): """ Merge next file from the source list with the target file. Function called recursively for each element of the list. :param TFile target: the result ROOT file :param TList source_list: list of input files to merge """ logger = get_logger() raw_path = target.GetPath() path = raw_path[raw_path.find(":") + 1:] first_source = source_list.First() first_source.cd(path) current_source_dir = gDirectory # gain time, do not add the objects in the list in memory status = TH1.AddDirectoryStatus() TH1.AddDirectory(False) # loop over all keys in this directory #global_chain = TChain() next_key = TIter(current_source_dir.GetListOfKeys()) #key = TKey() #TKey old_key = None key = next_key() while key: # keep only the highest cycle number for each key #if old_key and not old_key.GetName() == key.GetName(): # continue # read object from first source file first_source.cd(path) obj = key.ReadObj() if obj.IsA().InheritsFrom(TH1.Class()): # descendant of TH1 -> merge it logger.info("Merging histogram %s", obj.GetName()) h1 = TH1(obj) # loop over all source files and add the content of the # correspondant histogram to the one pointed to by "h1" next_source = source_list.After(first_source) while next_source: # make sure we are at the correct directory level by cd'ing to path next_source.cd(path) key2 = gDirectory.GetListOfKeys().FindObject(h1.GetName()) if key2: h2 = TH1(key2.ReadObj()) h1.Add(h2) #del h2 next_source = source_list.After(next_source) elif obj.IsA().InheritsFrom(TTree.Class()): logger.info("Merging tree %s", obj.GetName()) # loop over all source files and create a chain of Trees "global_chain" obj_name = obj.GetName() global_chain = TChain(obj_name) global_chain.Add(first_source.GetName()) next_source = source_list.After(first_source) while next_source: global_chain.Add(next_source.GetName()) next_source = source_list.After(next_source) elif obj.IsA().InheritsFrom(TDirectory.Class()): logger.info("Found subdirectory %s", obj.GetName()) # create a new subdir of same name and title in the target file target.cd() new_dir = target.mkdir(obj.GetName(), obj.GetTitle()) # newdir is now the starting point of another round of merging # newdir still knows its depth within the target file via # GetPath(), so we can still figure out where we are in the recursion merge_root_file(new_dir, source_list) else: logger.info("Unknown object type, name: %s, title: %s", obj.GetName(), obj.GetTitle()) # now write the merged histogram (which is "in" obj) to the target file # note that this will just store obj in the current directory level, # which is not persistent until the complete directory itself is stored # by "target.Write()" below if obj is not None: target.cd() # if the object is a tree, it is stored in global_chain... if obj.IsA().InheritsFrom(TTree.Class()): global_chain.Merge(target.GetFile(), 0, "keep") else: obj.Write(key.GetName()) # move to the next element key = next_key() # save modifications to target file target.SaveSelf(True) TH1.AddDirectory(status) target.Write()
def __init__(self, data_param, case): self.logger = get_logger() self.logger.info("DataValidator::Init\nCase: %s", case) # Dataset config self.grid_phi = data_param["grid_phi"] self.grid_z = data_param["grid_z"] self.grid_r = data_param["grid_r"] self.selopt_input = data_param["selopt_input"] self.selopt_output = data_param["selopt_output"] self.opt_train = data_param["opt_train"] self.opt_predout = data_param["opt_predout"] self.nameopt_predout = data_param["nameopt_predout"] self.dim_input = sum(self.opt_train) self.dim_output = sum(self.opt_predout) self.validate_model = data_param["validate_model"] self.use_scaler = data_param["use_scaler"] # Directories self.dirmodel = data_param["dirmodel"] self.dirval = data_param["dirval"] self.diroutflattree = data_param["diroutflattree"] self.dirouthistograms = data_param["dirouthistograms"] train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \ else data_param["dirinput_nobias"] test_dir = data_param["dirinput_bias"] if data_param["test_bias"] \ else data_param["dirinput_nobias"] apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \ else data_param["dirinput_nobias"] self.dirinput_train = "%s/SC-%d-%d-%d/" % \ (train_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_test = "%s/SC-%d-%d-%d/" % \ (test_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_apply = "%s/SC-%d-%d-%d/" % \ (apply_dir, self.grid_z, self.grid_r, self.grid_phi) self.dirinput_val = "%s/SC-%d-%d-%d/" % \ (data_param["dirinput_nobias"], self.grid_z, self.grid_r, self.grid_phi) # DNN config self.filters = data_param["filters"] self.pooling = data_param["pooling"] self.depth = data_param["depth"] self.batch_normalization = data_param["batch_normalization"] self.dropout = data_param["dropout"] self.suffix = "phi%d_r%d_z%d_filter%d_poo%d_drop%.2f_depth%d_batch%d_scaler%d" % \ (self.grid_phi, self.grid_r, self.grid_z, self.filters, self.pooling, self.dropout, self.depth, self.batch_normalization, self.use_scaler) self.suffix = "%s_useSCMean%d_useSCFluc%d" % \ (self.suffix, self.opt_train[0], self.opt_train[1]) self.suffix = "%s_pred_doR%d_dophi%d_doz%d" % \ (self.suffix, self.opt_predout[0], self.opt_predout[1], self.opt_predout[2]) self.suffix_ds = "phi%d_r%d_z%d" % \ (self.grid_phi, self.grid_r, self.grid_z) self.logger.info("I am processing the configuration %s", self.suffix) if self.dim_output > 1: self.logger.fatal( "YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1" ) self.logger.info( "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)", self.opt_train[0], self.opt_train[1]) # Parameters for getting input indices self.maxrandomfiles = data_param["maxrandomfiles"] self.range_mean_index = data_param["range_mean_index"] self.indices_events_means = None self.partition = None self.total_events = 0 self.train_events = 0 self.test_events = 0 self.apply_events = 0 self.tree_events = data_param["tree_events"] if not os.path.isdir(self.diroutflattree): os.makedirs(self.diroutflattree) if not os.path.isdir("%s/%s" % (self.diroutflattree, self.suffix)): os.makedirs("%s/%s" % (self.diroutflattree, self.suffix)) if not os.path.isdir("%s/%s" % (self.dirouthistograms, self.suffix)): os.makedirs("%s/%s" % (self.dirouthistograms, self.suffix))
def main(): """ The global main function """ logger = get_logger() logger.info("Starting TPC ML...") if len(sys.argv) == 2: default_file_name = sys.argv[1] print("Using user specified steering options file: %s" % default_file_name) else: default_file_name = "default.yml" with open(default_file_name, 'r') as default_data: default = yaml.safe_load(default_data) with open("config_model_parameters.yml", 'r') as parameters_data: config_parameters = yaml.safe_load(parameters_data) # FIXME: Do we need these commented lines anymore? #dirmodel = config_parameters["common"]["dirmodel"] #dirval = config_parameters["common"]["dirval"] #dirinput = config_parameters["common"]["dirinput"] # NOTE # checkdir and checkmakedir not yet implemented. Was previously used from # machine_learning_hep package but is now the only thing required from there. # Easy to adapt an implementation like that to avoid heavy dependency # on machine_learning_hep #counter = 0 #if dotraining is True: # counter = counter + checkdir(dirmodel) #if dotesting is True: # counter = counter + checkdir(dirval) #if counter < 0: # sys.exit() #if dotraining is True: # checkmakedir(dirmodel) #if dotesting is True: # checkmakedir(dirval) models, corr, dataval = init_models(config_parameters) events_counts = (get_events_counts( config_parameters[model.name]["train_events"], config_parameters[model.name]["test_events"], config_parameters[model.name]["apply_events"]) for model in models) max_available_events = config_parameters["common"]["max_events"] for model, model_events_counts in zip(models, events_counts): all_events_counts = [] for (train_events, test_events, apply_events) in model_events_counts: total_events = train_events + test_events + apply_events if total_events > max_available_events: print("Too big number of events requested: %d available: %d" % \ (total_events, max_available_events)) continue all_events_counts.append( (train_events, test_events, apply_events, total_events)) ranges = { "train": [0, train_events], "test": [train_events, train_events + test_events], "apply": [train_events + test_events, total_events] } model.config.set_ranges(ranges, total_events, train_events, test_events, apply_events) run_model_and_val(model, dataval, default, config_parameters["common"]) # TODO: apply the correction and save in files if corr is not None: pass if default["doprofile"] is True: model.draw_profile(all_events_counts) logger.info("Program finished.")