def __init__( self, samplefile, function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth]) after_n_batches=50, batchsize=10, on_epoch_end=False, use_event=0, decay_function=None, offset=0): super(PredictCallback, self).__init__() self.samplefile = samplefile self.function_to_apply = function_to_apply self.counter = 0 self.call_counter = offset self.decay_function = decay_function self.after_n_batches = after_n_batches self.run_on_epoch_end = on_epoch_end if self.run_on_epoch_end and self.after_n_batches >= 0: print( 'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end' ) self.after_n_batches = 0 td = TrainData() td.readFromFile(samplefile) if use_event >= 0: td.skim(use_event) self.batchsize = 1 self.td = td self.gen = TrainDataGenerator() self.gen.setBatchSize(batchsize) self.gen.setSkipTooLargeBatches(False)
from DeepJetCore.dataPipeline import TrainDataGenerator infile=args.infile nbatch=int(args.nelementsperfile) randomise = args.randomise dc = DataCollection(infile) dc2 = DataCollection(infile) samples = dc.samples dir = dc.dataDir if len(dir)<1: dir='.' insamples = [dir+'/'+s for s in samples] gen = TrainDataGenerator() gen.setBatchSize(nbatch) gen.setSkipTooLargeBatches(False) gen.setFileList(insamples) if randomise: gen.shuffleFileList() nbatches = gen.getNBatches() newsamples=[] for i in range(nbatches): newname = str(samples[0][:-6]+"_n_"+str(i)+".djctd") newsamples.append(newname) ntd = gen.getBatch() print(newname)
use_inputdir="" outfilename = "pred_"+os.path.basename( inputfile ) td = dc.dataclass() if inputfile[-5:] == 'djctd': if args.unbuffered: td.readFromFile(use_inputdir+"/"+inputfile) else: td.readFromFileBuffered(use_inputdir+"/"+inputfile) else: print('converting '+inputfile) td.readFromSourceFile(use_inputdir+"/"+inputfile, dc.weighterobjects, istraining=False) gen = TrainDataGenerator() if batchsize < 1: batchsize = dc.getBatchSize() print('batch size',batchsize) gen.setBatchSize(batchsize) gen.setSquaredElementsLimit(dc.batch_uses_sum_of_squares) gen.setSkipTooLargeBatches(False) gen.setBuffer(td) predicted = model.predict_generator(gen.feedNumpyData(), steps=gen.getNBatches(), max_queue_size=1, use_multiprocessing=False,verbose=1) x = td.transferFeatureListToNumpy(args.pad_rowsplits)
def invokeGen(infile): if infile[-6:] == '.djcdc': dc = DataCollection(infile) td = dc.dataclass() tdclass = dc.dataclass dc.setBatchSize(1) gen = dc.invokeGenerator() elif infile[-6:] == '.djctd': td = TrainData_NanoML() tdclass = TrainData_NanoML td.readFromFile(infile) gen = TrainDataGenerator() gen.setBatchSize(1) gen.setBuffer(td) elif infile[-5:] == '.root': print('reading from root file') td = TrainData_NanoML() tdclass = TrainData_NanoML td.readFromSourceFile(infile,{},True) td.writeToFile(infile+'.djctd') td.readFromFile(infile+'.djctd') gen = TrainDataGenerator() gen.setBatchSize(1) gen.setBuffer(td) gen.setSkipTooLargeBatches(False) nevents = gen.getNBatches() gen.cast_to = tdclass return gen.feedTrainData,nevents,td
class PredictCallback(Callback): def __init__( self, samplefile, function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth]) after_n_batches=50, batchsize=10, on_epoch_end=False, use_event=0, decay_function=None, offset=0): super(PredictCallback, self).__init__() self.samplefile = samplefile self.function_to_apply = function_to_apply self.counter = 0 self.call_counter = offset self.decay_function = decay_function self.after_n_batches = after_n_batches self.run_on_epoch_end = on_epoch_end if self.run_on_epoch_end and self.after_n_batches >= 0: print( 'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end' ) self.after_n_batches = 0 td = TrainData() td.readFromFile(samplefile) if use_event >= 0: td.skim(use_event) self.batchsize = 1 self.td = td self.gen = TrainDataGenerator() self.gen.setBatchSize(batchsize) self.gen.setSkipTooLargeBatches(False) def reset(self): self.call_counter = 0 def predict_and_call(self, counter): self.gen.setBuffer(self.td) predicted = self.model.predict_generator(self.gen.feedNumpyData(), steps=self.gen.getNBatches(), max_queue_size=1, use_multiprocessing=False, verbose=2) if not isinstance(predicted, list): predicted = [predicted] self.function_to_apply(self.call_counter, self.td.copyFeatureListToNumpy(), predicted, self.td.copyTruthListToNumpy()) self.call_counter += 1 def on_epoch_end(self, epoch, logs=None): self.counter = 0 if not self.run_on_epoch_end: return self.predict_and_call(epoch) def on_batch_end(self, batch, logs=None): if self.after_n_batches <= 0: return self.counter += 1 if self.counter > self.after_n_batches: self.counter = 0 self.predict_and_call(batch) if self.decay_function is not None: self.after_n_batches = self.decay_function(self.call_counter)
def invokeGenerator(self): generator = TrainDataGenerator() generator.setBatchSize(self.__batchsize) generator.setSquaredElementsLimit(self.batch_uses_sum_of_squares) generator.setFileList([self.dataDir + "/" + s for s in self.samples]) return generator
from DeepJetCore.TrainData import TrainData from DeepJetCore.dataPipeline import TrainDataGenerator from LayersRagged import RaggedConstructTensor import index_dicts import tensorflow as tf import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "4" td = TrainData() td.readFromFile( '/eos/cms/store/cmst3/group/hgcal/CMG_studies/pepr/50_part_with_noise_Jul2020/converted/HGCalML_data/50_part_with_noise_Jul2020/988_windowntup.djctd' ) gen = TrainDataGenerator() gen.setBatchSize(100000) gen.setSkipTooLargeBatches(False) gen.setBuffer(td) with tf.device('/CPU:0'): ragged_constructor = RaggedConstructTensor() while True: feat, truth = next( gen.feedNumpyData()) # this is [ [features],[truth],[None] ] if gen.lastBatch(): break row_splits = feat[1][:, 0]
def predict(self, model=None, model_path=None, output_to_file=True): if model_path == None: model_path = self.model_path if model is None: if not os.path.exists(model_path): raise FileNotFoundError('Model file not found') assert model_path is not None or model is not None outputs = [] if output_to_file: os.system('mkdir -p ' + self.predict_dir) if model is None: model = load_model(model_path) all_data = [] for inputfile in self.input_data_files: use_inputdir = self.inputdir if inputfile[0] == "/": use_inputdir = "" outfilename = "pred_" + os.path.basename(inputfile) print('predicting ', use_inputdir + '/' + inputfile) td = self.dc.dataclass() #also allows for inheriting classes now, like with tracks or special PU if not isinstance(td, TrainData_NanoML) and type( td) is not TrainData_TrackML: raise RuntimeError( "TODO: make sure this works for other traindata formats") if inputfile[-5:] == 'djctd': if self.unbuffered: td.readFromFile(use_inputdir + "/" + inputfile) else: td.readFromFileBuffered(use_inputdir + "/" + inputfile) else: print('converting ' + inputfile) td.readFromSourceFile(use_inputdir + "/" + inputfile, self.dc.weighterobjects, istraining=False) gen = TrainDataGenerator() # the batch size must be one otherwise we need to play tricks with the row splits later on gen.setBatchSize(1) gen.setSquaredElementsLimit(False) gen.setSkipTooLargeBatches(False) gen.setBuffer(td) num_steps = gen.getNBatches() generator = gen.feedNumpyData() dumping_data = [] thistime = time.time() for _ in range(num_steps): data_in = next(generator) predictions_dict = model(data_in[0]) for k in predictions_dict.keys(): predictions_dict[k] = predictions_dict[k].numpy() features_dict = td.createFeatureDict(data_in[0]) truth_dict = td.createTruthDict(data_in[0]) dumping_data.append( [features_dict, truth_dict, predictions_dict]) totaltime = time.time() - thistime print('took approx', totaltime / num_steps, 's per endcap (also includes dict building)') td.clear() gen.clear() outfilename = os.path.splitext(outfilename)[0] + '.bin.gz' if output_to_file: td.writeOutPredictionDict(dumping_data, self.predict_dir + "/" + outfilename) outputs.append(outfilename) if not output_to_file: all_data.append(dumping_data) if output_to_file: with open(self.predict_dir + "/outfiles.txt", "w") as f: for l in outputs: f.write(l + '\n') if not output_to_file: return all_data
def __init__(self, samplefile, accumulate_after_batches=5, plot_after_batches=50, batchsize=10, beta_threshold=0.6, distance_threshold=0.6, iou_threshold=0.1, n_windows_for_plots=5, n_windows_for_scalar_metrics=5000000, outputdir=None, publish = None, n_ccoords=None, n_average_over_samples=5, ): """ :param samplefile: the file to pick validation data from :param accumulate_after_batches: run performance metrics after n batches (a good value is 5) :param plot_after_batches: update and upload plots after n batches :param batchsize: batch size :param beta_threshold: beta threshold for running prediction on obc :param distance_threshold: distance threshold for running prediction on obc :param iou_threshold: iou threshold to use to match both for obc and for ticl :param n_windows_for_plots: how many windows to average to do running performance plots :param n_windows_for_scalar_metrics: the maximum windows to store data for scalar performance metrics as a function of iteration :param outputdir: the output directory where to store results :param publish: where to publish, could be ssh'able path :param n_ccoords: n coords for plots :param n_average_over_samples: average scalar metrics over samples """ super(plotRunningPerformanceMetrics, self).__init__() self.samplefile = samplefile self.counter = 0 self.call_counter = 0 self.decay_function = None self.outputdir = outputdir self.n_ccords=n_ccoords self.publish=publish self.accumulate_after_batches = accumulate_after_batches self.plot_after_batches = plot_after_batches self.run_on_epoch_end = False if self.run_on_epoch_end and self.accumulate_after_batches >= 0: print('PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end') self.accumulate_after_batches = 0 td = TrainData() td.readFromFile(samplefile) # td_selected = td.split(self.n_events) # check if this works in ragged out of the box # if use_event >= 0: # if use_event < td.nElements(): # td.skim(use_event) # else: # td.skim(use_event % td.nElements()) self.batchsize = batchsize self.td = td self.gen = TrainDataGenerator() self.gen.setBatchSize(self.batchsize) self.gen.setSkipTooLargeBatches(False) self.gen.setBuffer(td) self.n_batches=self.gen.getNBatches() with tf.device('/CPU:0'): self.ragged_constructor = RaggedConstructTensor() self.window_id = 0 self.window_analysis_dicts = [] self.n_windows_for_plots = n_windows_for_plots self.n_windows_for_scalar_metrics = n_windows_for_scalar_metrics self.beta_threshold = beta_threshold self.distance_threshold = distance_threshold self.iou_threshold = iou_threshold self.scalar_metrics = dict() self.scalar_metrics['efficiency'] = [] self.scalar_metrics['efficiency_ticl'] = [] self.scalar_metrics['fake_rate'] = [] self.scalar_metrics['fake_rate_ticl'] = [] self.scalar_metrics['var_response'] = [] self.scalar_metrics['var_response_ticl'] = [] self.scalar_metrics['iteration'] = [] self.n_average_over_samples = n_average_over_samples self.plot_process = None
class plotRunningPerformanceMetrics(Callback): def __init__(self, samplefile, accumulate_after_batches=5, plot_after_batches=50, batchsize=10, beta_threshold=0.6, distance_threshold=0.6, iou_threshold=0.1, n_windows_for_plots=5, n_windows_for_scalar_metrics=5000000, outputdir=None, publish = None, n_ccoords=None, n_average_over_samples=5, ): """ :param samplefile: the file to pick validation data from :param accumulate_after_batches: run performance metrics after n batches (a good value is 5) :param plot_after_batches: update and upload plots after n batches :param batchsize: batch size :param beta_threshold: beta threshold for running prediction on obc :param distance_threshold: distance threshold for running prediction on obc :param iou_threshold: iou threshold to use to match both for obc and for ticl :param n_windows_for_plots: how many windows to average to do running performance plots :param n_windows_for_scalar_metrics: the maximum windows to store data for scalar performance metrics as a function of iteration :param outputdir: the output directory where to store results :param publish: where to publish, could be ssh'able path :param n_ccoords: n coords for plots :param n_average_over_samples: average scalar metrics over samples """ super(plotRunningPerformanceMetrics, self).__init__() self.samplefile = samplefile self.counter = 0 self.call_counter = 0 self.decay_function = None self.outputdir = outputdir self.n_ccords=n_ccoords self.publish=publish self.accumulate_after_batches = accumulate_after_batches self.plot_after_batches = plot_after_batches self.run_on_epoch_end = False if self.run_on_epoch_end and self.accumulate_after_batches >= 0: print('PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end') self.accumulate_after_batches = 0 td = TrainData() td.readFromFile(samplefile) # td_selected = td.split(self.n_events) # check if this works in ragged out of the box # if use_event >= 0: # if use_event < td.nElements(): # td.skim(use_event) # else: # td.skim(use_event % td.nElements()) self.batchsize = batchsize self.td = td self.gen = TrainDataGenerator() self.gen.setBatchSize(self.batchsize) self.gen.setSkipTooLargeBatches(False) self.gen.setBuffer(td) self.n_batches=self.gen.getNBatches() with tf.device('/CPU:0'): self.ragged_constructor = RaggedConstructTensor() self.window_id = 0 self.window_analysis_dicts = [] self.n_windows_for_plots = n_windows_for_plots self.n_windows_for_scalar_metrics = n_windows_for_scalar_metrics self.beta_threshold = beta_threshold self.distance_threshold = distance_threshold self.iou_threshold = iou_threshold self.scalar_metrics = dict() self.scalar_metrics['efficiency'] = [] self.scalar_metrics['efficiency_ticl'] = [] self.scalar_metrics['fake_rate'] = [] self.scalar_metrics['fake_rate_ticl'] = [] self.scalar_metrics['var_response'] = [] self.scalar_metrics['var_response_ticl'] = [] self.scalar_metrics['iteration'] = [] self.n_average_over_samples = n_average_over_samples self.plot_process = None def reset(self): self.call_counter = 0 def predict_and_call(self, counter): feat, truth = next(self.gen.feedNumpyData()) # this is [ [features],[truth],[None] ] if self.gen.lastBatch(): self.gen.setBuffer(self.td) # self.gen.prepareNextEpoch() def dummy_gen(): yield (feat, truth) predicted = self.model.predict_generator(dummy_gen(), steps=1, max_queue_size=1, use_multiprocessing=False, verbose=2) self.accumulate(self.counter, feat, predicted, truth) self.call_counter += 1 def on_epoch_end(self, epoch, logs=None): self.counter = 0 if not self.run_on_epoch_end: return self.predict_and_call(epoch) def on_batch_end(self, batch, logs=None): if self.accumulate_after_batches <= 0: return if self.counter % self.accumulate_after_batches == 0: self.predict_and_call(batch) if self.plot_after_batches > 0: if self.counter % self.plot_after_batches == 0: self.plot() self.counter += 1 def plot(self): if self.plot_process is not None: self.plot_process.join() self.plot_process = Process(target=self._plot, args=(copy.deepcopy(self.window_analysis_dicts), copy.deepcopy(self.scalar_metrics))) self.plot_process.start() def _plot(self, window_analysis_dicts, scalar_metrics): with tf.device('/CPU:0'): if len(window_analysis_dicts) == self.n_windows_for_plots: print("Plotting and publishing") dataset_analysis_dict = build_dataset_analysis_dict() dataset_analysis_dict['beta_threshold'] = self.beta_threshold dataset_analysis_dict['distance_threshold'] = self.distance_threshold dataset_analysis_dict['iou_threshold'] = self.iou_threshold for x in window_analysis_dicts: dataset_analysis_dict = append_window_dict_to_dataset_dict(dataset_analysis_dict, x) make_running_plots(self.outputdir, dataset_analysis_dict, scalar_metrics, self.n_average_over_samples, get_analysis_plotting_configuration('standard_hgcal_with_ticl')) if self.publish is not None: for f in os.listdir(self.outputdir): if f.endswith('.png'): f_full = os.path.join(self.outputdir, f) cpstring = 'cp -f ' if "@" in self.publish: cpstring = 'scp ' s = (cpstring + f_full + ' ' + self.publish + f+' > /dev/null') os.system(s) def accumulate(self, counter, feat, predicted, truth): print("Accumulating") with tf.device('/CPU:0'): new_window_analysis_dicts = self.analyse_one_file(feat, predicted, truth) self.window_analysis_dicts += new_window_analysis_dicts for i, wdict in enumerate(new_window_analysis_dicts): efficiency = float(wdict['window_num_found_showers']) / wdict['window_num_truth_showers'] efficiency_ticl = float(wdict['window_num_found_showers_ticl']) / wdict['window_num_truth_showers'] fake_rate = float(wdict['window_num_fake_showers']) / wdict['window_num_pred_showers'] fake_rate_ticl = float(wdict['window_num_fake_showers_ticl']) / wdict['window_num_ticl_showers'] truth_shower_energy = np.array(wdict['truth_shower_energy']) pred_shower_energy = np.array(wdict['truth_shower_matched_energy_regressed']) ticl_shower_energy = np.array(wdict['truth_shower_matched_energy_regressed_ticl']) filter = pred_shower_energy!=-1 filter_ticl = ticl_shower_energy!=-1 var_res = pred_shower_energy[filter]/truth_shower_energy[filter] var_res = np.std(var_res) / np.mean(var_res) var_res_ticl = ticl_shower_energy[filter_ticl] / truth_shower_energy[filter_ticl] var_res_ticl = np.std(var_res_ticl) / np.mean(var_res_ticl) iteration = counter + float((i +1)) / float(len(new_window_analysis_dicts)) self.scalar_metrics['efficiency'].append(efficiency) self.scalar_metrics['efficiency_ticl'].append(efficiency_ticl) self.scalar_metrics['fake_rate'].append(fake_rate) self.scalar_metrics['fake_rate_ticl'].append(fake_rate_ticl) self.scalar_metrics['var_response'].append(var_res) self.scalar_metrics['var_response_ticl'].append(var_res_ticl) self.scalar_metrics['iteration'].append(iteration) while len(self.window_analysis_dicts) > self.n_windows_for_plots: self.window_analysis_dicts.pop(0) while len(self.scalar_metrics['iteration']) > self.n_windows_for_scalar_metrics: self.n_windows_for_scalar_metrics.pop(0) def analyse_one_file(self, _features, predictions, truth_in, soft=False): predictions = tf.constant(predictions) row_splits = _features[1][:, 0] features, _ = self.ragged_constructor((_features[0], row_splits)) truth, row_splits = self.ragged_constructor((_features[2], row_splits)) hit_assigned_truth_id = truth[:, 0:1] # make 100% sure the cast doesn't hit the fan hit_assigned_truth_id = tf.where(hit_assigned_truth_id < -0.1, hit_assigned_truth_id - 0.1, hit_assigned_truth_id + 0.1) hit_assigned_truth_id = tf.cast(hit_assigned_truth_id[:, 0], tf.int32) window_analysis_dicts = [] for i in range(len(row_splits) - 1): hit_assigned_truth_id_s = hit_assigned_truth_id[row_splits[i]:row_splits[i + 1]].numpy() features_s = features[row_splits[i]:row_splits[i + 1]].numpy() truth_s = truth[row_splits[i]:row_splits[i + 1]].numpy() prediction_s = predictions[row_splits[i]:row_splits[i + 1]].numpy() window_analysis_dict = analyse_one_window_cut(hit_assigned_truth_id_s, features_s, truth_s, prediction_s, self.beta_threshold, self.distance_threshold, self.iou_threshold, self.window_id, False, soft=soft) window_analysis_dicts.append(window_analysis_dict) # append_window_dict_to_dataset_dict(dataset_analysis_dict, window_analysis_dict) # num_visualized_segments += 1 self.window_id += 1 return window_analysis_dicts