def prepare(cfg): torch.cuda.set_device(cfg['training']['gpus'][0]) handlers = Handlers() # IO configuration # Batch size for I/O becomes minibatch size batch_size = cfg['iotool']['batch_size'] cfg['iotool']['batch_size'] = cfg['training']['minibatch_size'] handlers.data_io, cfg['data_keys'] = loader_factory(cfg) # TODO check that it does what we want (cycle through dataloader) # check on a small sample, check 1/ it cycles through and 2/ randomness if cfg['training']['train']: handlers.data_io_iter = iter(cycle(handlers.data_io)) else: handlers.data_io_iter = itertools.cycle(handlers.data_io) cfg['iotool']['batch_size'] = batch_size # Trainer configuration handlers.trainer = trainval(cfg) # Restore weights if necessary loaded_iteration = handlers.trainer.initialize() if cfg['training']['train']: handlers.iteration = loaded_iteration make_directories(cfg, loaded_iteration, handlers=handlers) return handlers
def test_loader(cfg_file, quiet=True, csv=False): """ Tests the loading of data using parse_sparse3d and parse_spars3d_scn. """ TOP_DIR = os.path.dirname(os.path.abspath(__file__)) TOP_DIR = os.path.dirname(TOP_DIR) sys.path.insert(0, TOP_DIR) # import import numpy as np from mlreco.iotools.factories import loader_factory # find config file if not os.path.isfile(cfg_file): cfg_file = os.path.join(TOP_DIR, 'config', cfg_file) if not os.path.isfile(cfg_file): print(cfg_file, 'not found...') sys.exit(1) if csv: from mlreco.utils.utils import CSVData csv = CSVData('csv.txt') # check if batch is specified (1st integer value in sys.argv) MAX_BATCH_ID = 20 # configure cfg = yaml.load(open(cfg_file, 'r'), Loader=yaml.Loader) loader, data_keys = loader_factory(cfg) if not quiet: print(len(loader), 'batches loaded') if not quiet: print('keys:', data_keys) # Loop tstart = time.time() tsum = 0. t0 = 0. for batch_id, data in enumerate(loader): titer = time.time() - tstart if not quiet: print('Batch', batch_id) for data_id in range(len(data_keys)): key = data_keys[data_id] print(' ', key, np.shape(data[data_id])) print(data[-1]) print('Duration', titer, '[s]') if batch_id < 1: t0 = titer tsum += (titer) if csv: csv.record(['iter', 't'], [batch_id, titer]) csv.write() if (batch_id + 1) == MAX_BATCH_ID: break tstart = time.time() if not quiet: print('Total time:', tsum, '[s] ... Average time:', tsum / MAX_BATCH_ID, '[s]') if MAX_BATCH_ID > 1: print('First iter:', t0, '[s] ... Average w/o first iter:', (tsum - t0) / (MAX_BATCH_ID - 1), '[s]') if csv: csv.close() return True
def main(): # import import numpy as np from mlreco.iotools.factories import loader_factory # find config file cfg_file = sys.argv[1] if not os.path.isfile(cfg_file): cfg_file = os.path.join(TOP_DIR, 'config', sys.argv[1]) if not os.path.isfile(cfg_file): print(sys.argv[1],'not found...') sys.exit(1) # check if quiet mode quiet = 'quiet' in sys.argv # check if csv should be made csv = 'csv' in sys.argv if csv: from mlreco.utils.utils import CSVData csv=CSVData('csv.txt') # check if batch is specified (1st integer value in sys.argv) MAX_BATCH_ID=20 for argv in sys.argv: if not argv.isdigit(): continue MAX_BATCH_ID=int(argv) break # configure cfg = yaml.load(open(cfg_file,'r'),Loader=yaml.Loader) loader,data_keys = loader_factory(cfg) if not quiet: print(len(loader),'batches loaded') if not quiet: print('keys:',data_keys) # Loop tstart=time.time() tsum=0. t0=0. for batch_id,data in enumerate(loader): titer=time.time() - tstart if not quiet: print('Batch',batch_id) for data_id in range(len(data_keys)): key = data_keys[data_id] print(' ',key,np.shape(data[data_id])) print(data[-1]) print('Duration',titer,'[s]') if batch_id < 1: t0 = titer tsum += (titer) if csv: csv.record(['iter','t'],[batch_id,titer]) csv.write() if (batch_id+1) == MAX_BATCH_ID: break tstart=time.time() if not quiet: print('Total time:',tsum,'[s] ... Average time:',tsum/MAX_BATCH_ID,'[s]') if MAX_BATCH_ID>1: print('First iter:',t0,'[s] ... Average w/o first iter:',(tsum - t0)/(MAX_BATCH_ID-1),'[s]') if csv: csv.close()
def apply_event_filter(handlers, event_list=None): """ Reconfigures IO to apply an event filter INPUT: - handlers is Handlers instance generated by prepare() function - event_list is an array of integers """ # Instantiate DataLoader handlers.data_io = loader_factory(handlers.cfg, event_list) # IO iterator handlers.data_io_iter = itertools.cycle(handlers.data_io) if 'trainval' in handlers.cfg and handlers.cfg['trainval']['train']: handlers.data_io_iter = iter(cycle(handlers.data_io))
def prepare(cfg): """ Prepares high level API handlers, namely trainval instance and torch DataLoader (w/ iterator) INPUT - cfg is a full configuration block after pre-processed by process_config function OUTPUT - Handler instance attached with trainval/DataLoader instances (if in config) """ handlers = Handlers() handlers.cfg = cfg # Instantiate DataLoader handlers.data_io = loader_factory(cfg) # IO iterator handlers.data_io_iter = itertools.cycle(handlers.data_io) if 'trainval' in cfg: # Set random seed for reproducibility np.random.seed(cfg['trainval']['seed']) torch.manual_seed(cfg['trainval']['seed']) # Set primary device if len(cfg['trainval']['gpus']) > 0: torch.cuda.set_device(cfg['trainval']['gpus'][0]) # TODO check that it does what we want (cycle through dataloader) # check on a small sample, check 1/ it cycles through and 2/ randomness if cfg['trainval']['train']: handlers.data_io_iter = iter(cycle(handlers.data_io)) # Trainer configuration handlers.trainer = trainval(cfg) # set the shared clock handlers.watch = handlers.trainer._watch # Restore weights if necessary loaded_iteration = handlers.trainer.initialize() if cfg['trainval']['train']: handlers.iteration = loaded_iteration make_directories(cfg, loaded_iteration, handlers=handlers) return handlers
learning_rate: 0.0025 gpus: '0' weight_prefix: '/gpfs/slac/staas/fs1/g/neutrino/qatom/gnn-models/early_stop_pi0-20.ckpt' iterations: 1000 report_step: 1 checkpoint_step: 100 log_dir: logs/edge_gnn/edge_node_only model_path: '' train: True debug: False minibatch_size: 1 ''' cfg = yaml.load(cfg, Loader=yaml.Loader) process_config(cfg) loader, cfg['data_keys'] = loader_factory(cfg) import torch import pickle from train_voxel_gnn import GraphDataset from os import listdir from torch.utils.data import DataLoader from mlreco.main_funcs import cycle data_path = '/gpfs/slac/staas/fs1/g/neutrino/qatom/gnn_pi0_reco_nocompton/' valid_data = GraphDataset(data_path, 1, data_key='00000') valid_dataset = cycle(valid_data) from mlreco.trainval import trainval from mlreco.main_funcs import get_data_minibatched
_, _, labels = generate_truth(d, positions, groups, edges) contents = (edges, nf, ef, labels) pickle.dump(contents, open(fname, 'wb+')) print('saved', fname) return fname files = sorted(listdir(cfg['iotool']['dataset']['data_dirs'][0])) orig_key = cfg['iotool']['dataset']['data_key'] worker_inputs = [] for f in range(len(files)): if orig_key not in files[f]: continue cfg['iotool']['dataset']['data_key'] = files[f] loader, _ = loader_factory(cfg) dataset = iter(cycle(loader)) names = [] while True: d = next(dataset, None) name = d['index'][0][0] if name in names: break names.append(name) if name < start_num: continue fname = OUTPUT_DATASET + str(f * 1000 + name).zfill(10) + '.pkl' if BLUR_DATASET: d = process(d) try: worker(d, fname)
e4 = [] e5 = [] e6 = [] e7 = [] l3 = np.zeros(0) l4 = np.zeros(0) l5 = np.zeros(0) l6 = np.zeros(0) l7 = np.zeros(0) # backward gamma separation l8 = np.empty((0, 3)) # fitted gamma directions l9 = np.zeros(0) # gamma pca values l10 = np.zeros(0) # gamma nhits in pca for f in range(len(files)): cfg['iotool']['dataset']['data_key'] = files[f] loader, data_keys = factories.loader_factory(cfg) it = iter(loader) preader = particle_reader(cfg['iotool']['dataset']['data_dirs'][0] + '/' + files[f]) for i in range(len(loader.dataset)): # print(i/len(loader.dataset)) pinfo = preader.get_event(i) out = worker(next(it), f) if out is not None and len(out[0][0][0]) > 0: all_energies, pi0_cos, particles, gamma_sep, gamma_dir, gamma_pca, gamma_pca_nhits = out energies = pinfo['creation_energy'][particles] true_x = pinfo['direction_x'][particles] true_y = pinfo['direction_y'][particles] true_z = pinfo['direction_z'][particles] true_dirs = np.array([true_x.T, true_y.T, true_z.T]).T
import pickle start_num = 0 files = sorted(listdir(cfg['iotool']['dataset']['data_dirs'][0])) orig_key = cfg['iotool']['dataset']['data_key'] worker_inputs = [] for f in range(len(files)): if orig_key not in files[f]: continue # if int(files[f][-10:-5]) < start_num: # continue cfg['iotool']['dataset']['data_key'] = files[f] loader, _ = factories.loader_factory(cfg) dataset = iter(loader) names = [] index = 0 num = 0 while True: d = next(dataset, None) if num < start_num: continue num += 1 if d is None: break n = d['index'][0][0] print(n) if n in names: