def track_eff_of_edge_selected(evtid, config_name, matching_cut=0.8, remove_duplicated_hits=False): config = load_yaml(config_name) evt_dir = config['track_ml']['dir'] layers = config['doublets_from_cuts']['layers'] sel_layer_id = select_pair_layers(layers) event = Event(evt_dir) event.read(evtid) barrel_hits = event.filter_hits(layers) data_source = 'doublets_from_cuts' cfg = config[data_source] pairs_selected_dir = cfg['selected'] pairs_input_dir = os.path.join(pairs_selected_dir, 'evt{}'.format(evtid)) all_segments = [] for pair_id in sel_layer_id: file_name = os.path.join(pairs_input_dir, 'pair{:03d}.h5'.format(pair_id)) try: with pd.HDFStore(file_name, 'r') as store: df = store.get('data') except KeyError: pass else: all_segments.append(df) segments = pd.concat(all_segments, ignore_index=True) graph = utils_data.segments_to_nx(barrel_hits, segments, sender_hitid_name='hit_id_in', receiver_hitid_name='hit_id_out', solution_name='true', use_digraph=True, bidirection=False) track_cands = wrangler.get_tracks(graph, feature_name='solution', with_fit=False) df_track_cands = analysis.graphs_to_df(track_cands) summary = analysis.summary_on_prediction(graph, barrel_hits, df_track_cands, matching_cut=matching_cut) true_nhits = barrel_hits[barrel_hits.particle_id > 0].groupby( 'particle_id')['hit_id'].count() true_particle_ids = true_nhits[true_nhits > 2].index.to_numpy() particles = event.particles pT_all = particles[particles.particle_id.isin( true_particle_ids)].pt.to_numpy() pT_sel = particles[particles.particle_id.isin( summary['correct_pids'])].pt.to_numpy() return pT_all, pT_sel
def setup_from_config(self, config_dir): config = load_yaml(config_dir) evt_dir = config['track_ml']['dir'] layers = config['doublets_from_cuts']['layers'] phi_slope_max = config['doublets_from_cuts']['phi_slope_max'] z0_max = config['doublets_from_cuts']['z0_max'] min_hits = config['doublets_from_cuts']['min_hits'] base_outdir = config['doublets_from_cuts']['selected'] self.setup(evt_dir, phi_slope_max, z0_max, layers, min_hits, base_outdir)
def fraction_of_duplicated_hits(evtid, config_name): config = load_yaml(config_name) evt_dir = config['track_ml']['dir'] layers = config['doublets_from_cuts']['layers'] event = Event(evt_dir, evtid) barrel_hits = event.filter_hits(layers) # remove noise hits barrel_hits = barrel_hits[barrel_hits.particle_id > 0] sel = barrel_hits.groupby("particle_id")['layer'].apply( lambda x: len(x) - np.unique(x).shape[0]).values return sel
def main(): """Main function""" # Parse the command line args = parse_args() # Initialize MPI rank, n_ranks = init_workers(args.distributed) # Load configuration from heptrkx import load_yaml config = load_yaml(args.config) output_dir = os.path.expandvars(config.get('output_dir', None)) if rank == 0: os.makedirs(output_dir, exist_ok=True) else: output_dir = None # Setup logging config_logging(verbose=args.verbose, output_dir=output_dir) logging.info('Initialized rank %i out of %i', rank, n_ranks) if args.show_config and (rank == 0): logging.info('Command line config: %s' % args) if rank == 0: logging.info('Configuration: %s', config) logging.info('Saving job outputs to %s', output_dir) # Load the datasets train_data_loader, valid_data_loader = get_data_loaders( distributed=args.distributed, **config['data']) logging.info('Loaded %g training samples', len(train_data_loader.dataset)) if valid_data_loader is not None: logging.info('Loaded %g validation samples', len(valid_data_loader.dataset)) # Load the trainer trainer = get_trainer(distributed=args.distributed, output_dir=output_dir, device=args.device, **config['trainer']) # Build the model and optimizer trainer.build_model(**config.get('model', {})) if rank == 0: trainer.print_model_summary() # Run the training summary = trainer.train(train_data_loader=train_data_loader, valid_data_loader=valid_data_loader, **config['training']) if rank == 0: trainer.write_summaries() # Print some conclusions n_train_samples = len(train_data_loader.sampler) logging.info('Finished training') train_time = np.mean(summary['train_time']) logging.info('Train samples %g time %g s rate %g samples/s', n_train_samples, train_time, n_train_samples / train_time) if valid_data_loader is not None: n_valid_samples = len(valid_data_loader.sampler) valid_time = np.mean(summary['valid_time']) logging.info('Valid samples %g time %g s rate %g samples/s', n_valid_samples, valid_time, n_valid_samples / valid_time) # Drop to IPython interactive shell if args.interactive and (rank == 0): logging.info('Starting IPython interactive session') import IPython IPython.embed() if rank == 0: logging.info('All done!')
def create_evaluator(config_name, iteration, input_ckpt=None): """ @config: configuration for train_nx_graph """ # load configuration file all_config = load_yaml(config_name) config = all_config['segment_training'] config_tr = config['parameters'] batch_size = n_graphs = config_tr['batch_size'] # need optimization num_processing_steps_tr = config_tr['n_iters'] ## level of message-passing prod_name = config['prod_name'] if input_ckpt is None: input_ckpt = os.path.join(config['output_dir'], prod_name) # generate inputs generate_input_target = inputs_generator( all_config['make_graph']['out_graph'], n_train_fraction=0.8) # build TF graph tf.compat.v1.reset_default_graph() model = get_model(config['model_name']) input_graphs, target_graphs = generate_input_target(n_graphs) input_ph = utils_tf.placeholders_from_data_dicts( input_graphs, force_dynamic_num_graphs=True) target_ph = utils_tf.placeholders_from_data_dicts( target_graphs, force_dynamic_num_graphs=True) output_ops_tr = model(input_ph, num_processing_steps_tr) try: sess.close() except NameError: pass sess = tf.Session() saver = tf.train.Saver() if iteration < 0: saver.restore(sess, tf.train.latest_checkpoint(input_ckpt)) else: saver.restore(sess, os.path.join(input_ckpt, ckpt_name.format(iteration))) def evaluator(input_graphs, target_graphs, use_digraph=False, bidirection=False): """ input is graph tuples, sizes should match batch_size """ feed_dict = {input_ph: input_graphs, target_ph: target_graphs} predictions = sess.run({ "outputs": output_ops_tr, "target": target_ph }, feed_dict=feed_dict) output = predictions['outputs'][-1] return utils_data.predicted_graphs_to_nxs(output, input_graphs, target_graphs, use_digraph=use_digraph, bidirection=bidirection) return evaluator
'--ckpt', default='trained_results/nxgraph_big_test_NOINT/bak', help='path that stores checkpoint') add_arg( '--trkml', default='/global/cscratch1/sd/xju/heptrkx/trackml_inputs/train_all', help='original tracking data') args = parser.parse_args() config_file = args.train_config input_ckpt = args.ckpt iteration = args.nEpoch # create the model model = create_evaluator(config_file, iteration, input_ckpt) config = load_yaml(config_file) # prepare for data file_dir = config['data']['output_nxgraph_dir'] base_dir = os.path.join(file_dir, "event00000{}_g{:09d}_INPUT.npz") evtid = args.evt isec = args.sec file_names = [] section_list = [] if isec < 0: section_patten = base_dir.format(evtid, 0).replace('_g{:09}'.format(0), '*') n_sections = int(len(glob.glob(section_patten))) file_names = [base_dir.format(evtid, ii) for ii in range(n_sections)] section_list = [ii for ii in range(n_sections)]