Beispiel #1
0
def prepare(cfg):
    torch.cuda.set_device(cfg['training']['gpus'][0])
    handlers = Handlers()

    # IO configuration
    # Batch size for I/O becomes minibatch size
    batch_size = cfg['iotool']['batch_size']
    cfg['iotool']['batch_size'] = cfg['training']['minibatch_size']
    handlers.data_io, cfg['data_keys'] = loader_factory(cfg)
    # TODO check that it does what we want (cycle through dataloader)
    # check on a small sample, check 1/ it cycles through and 2/ randomness
    if cfg['training']['train']:
        handlers.data_io_iter = iter(cycle(handlers.data_io))
    else:
        handlers.data_io_iter = itertools.cycle(handlers.data_io)
    cfg['iotool']['batch_size'] = batch_size

    # Trainer configuration
    handlers.trainer = trainval(cfg)

    # Restore weights if necessary
    loaded_iteration = handlers.trainer.initialize()
    if cfg['training']['train']:
        handlers.iteration = loaded_iteration

    make_directories(cfg, loaded_iteration, handlers=handlers)
    return handlers
Beispiel #2
0
def test_loader(cfg_file, quiet=True, csv=False):
    """
    Tests the loading of data using parse_sparse3d and parse_spars3d_scn.
    """
    TOP_DIR = os.path.dirname(os.path.abspath(__file__))
    TOP_DIR = os.path.dirname(TOP_DIR)
    sys.path.insert(0, TOP_DIR)
    # import
    import numpy as np
    from mlreco.iotools.factories import loader_factory
    # find config file
    if not os.path.isfile(cfg_file):
        cfg_file = os.path.join(TOP_DIR, 'config', cfg_file)
    if not os.path.isfile(cfg_file):
        print(cfg_file, 'not found...')
        sys.exit(1)

    if csv:
        from mlreco.utils.utils import CSVData
        csv = CSVData('csv.txt')
    # check if batch is specified (1st integer value in sys.argv)
    MAX_BATCH_ID = 20

    # configure
    cfg = yaml.load(open(cfg_file, 'r'), Loader=yaml.Loader)
    loader, data_keys = loader_factory(cfg)
    if not quiet: print(len(loader), 'batches loaded')
    if not quiet: print('keys:', data_keys)

    # Loop
    tstart = time.time()
    tsum = 0.
    t0 = 0.
    for batch_id, data in enumerate(loader):
        titer = time.time() - tstart
        if not quiet:
            print('Batch', batch_id)
            for data_id in range(len(data_keys)):
                key = data_keys[data_id]
                print('   ', key, np.shape(data[data_id]))
            print(data[-1])
            print('Duration', titer, '[s]')
        if batch_id < 1:
            t0 = titer
        tsum += (titer)
        if csv:
            csv.record(['iter', 't'], [batch_id, titer])
            csv.write()
        if (batch_id + 1) == MAX_BATCH_ID:
            break
        tstart = time.time()
    if not quiet:
        print('Total time:', tsum, '[s] ... Average time:',
              tsum / MAX_BATCH_ID, '[s]')
        if MAX_BATCH_ID > 1:
            print('First iter:', t0, '[s] ... Average w/o first iter:',
                  (tsum - t0) / (MAX_BATCH_ID - 1), '[s]')
    if csv: csv.close()
    return True
Beispiel #3
0
def main():
    # import
    import numpy as np
    from mlreco.iotools.factories import loader_factory
    # find config file
    cfg_file = sys.argv[1]
    if not os.path.isfile(cfg_file): cfg_file = os.path.join(TOP_DIR, 'config', sys.argv[1])
    if not os.path.isfile(cfg_file):
        print(sys.argv[1],'not found...')
        sys.exit(1)

    # check if quiet mode
    quiet = 'quiet' in sys.argv
    # check if csv should be made
    csv   = 'csv' in sys.argv
    if csv:
        from mlreco.utils.utils import CSVData
        csv=CSVData('csv.txt')
    # check if batch is specified (1st integer value in sys.argv)
    MAX_BATCH_ID=20
    for argv in sys.argv:
        if not argv.isdigit(): continue
        MAX_BATCH_ID=int(argv)
        break
    
    # configure
    cfg = yaml.load(open(cfg_file,'r'),Loader=yaml.Loader)
    loader,data_keys = loader_factory(cfg)
    if not quiet: print(len(loader),'batches loaded')
    if not quiet: print('keys:',data_keys)
    
    # Loop
    tstart=time.time()
    tsum=0.
    t0=0.
    for batch_id,data in enumerate(loader):
        titer=time.time() - tstart
        if not quiet:
            print('Batch',batch_id)
            for data_id in range(len(data_keys)):
                key = data_keys[data_id]
                print('   ',key,np.shape(data[data_id]))
            print(data[-1])
            print('Duration',titer,'[s]')
        if batch_id < 1:
            t0 = titer
        tsum += (titer)
        if csv:
            csv.record(['iter','t'],[batch_id,titer])
            csv.write()
        if (batch_id+1) == MAX_BATCH_ID:
            break
        tstart=time.time()
    if not quiet:
        print('Total time:',tsum,'[s] ... Average time:',tsum/MAX_BATCH_ID,'[s]')
        if MAX_BATCH_ID>1:
            print('First iter:',t0,'[s] ... Average w/o first iter:',(tsum - t0)/(MAX_BATCH_ID-1),'[s]')
    if csv: csv.close()
Beispiel #4
0
def apply_event_filter(handlers, event_list=None):
    """
    Reconfigures IO to apply an event filter
    INPUT:
      - handlers is Handlers instance generated by prepare() function
      - event_list is an array of integers
    """

    # Instantiate DataLoader
    handlers.data_io = loader_factory(handlers.cfg, event_list)

    # IO iterator
    handlers.data_io_iter = itertools.cycle(handlers.data_io)

    if 'trainval' in handlers.cfg and handlers.cfg['trainval']['train']:
        handlers.data_io_iter = iter(cycle(handlers.data_io))
Beispiel #5
0
def prepare(cfg):
    """
    Prepares high level API handlers, namely trainval instance and torch DataLoader (w/ iterator)
    INPUT
      - cfg is a full configuration block after pre-processed by process_config function
    OUTPUT
      - Handler instance attached with trainval/DataLoader instances (if in config)
    """
    handlers = Handlers()
    handlers.cfg = cfg

    # Instantiate DataLoader
    handlers.data_io = loader_factory(cfg)

    # IO iterator
    handlers.data_io_iter = itertools.cycle(handlers.data_io)

    if 'trainval' in cfg:
        # Set random seed for reproducibility
        np.random.seed(cfg['trainval']['seed'])
        torch.manual_seed(cfg['trainval']['seed'])

        # Set primary device
        if len(cfg['trainval']['gpus']) > 0:
            torch.cuda.set_device(cfg['trainval']['gpus'][0])


        # TODO check that it does what we want (cycle through dataloader)
        # check on a small sample, check 1/ it cycles through and 2/ randomness
        if cfg['trainval']['train']:
            handlers.data_io_iter = iter(cycle(handlers.data_io))

        # Trainer configuration
        handlers.trainer = trainval(cfg)

        # set the shared clock
        handlers.watch = handlers.trainer._watch

        # Restore weights if necessary
        loaded_iteration = handlers.trainer.initialize()
        if cfg['trainval']['train']:
            handlers.iteration = loaded_iteration

        make_directories(cfg, loaded_iteration, handlers=handlers)

    return handlers
Beispiel #6
0
      learning_rate: 0.0025
      gpus: '0'
      weight_prefix: '/gpfs/slac/staas/fs1/g/neutrino/qatom/gnn-models/early_stop_pi0-20.ckpt'
      iterations: 1000
      report_step: 1
      checkpoint_step: 100
      log_dir: logs/edge_gnn/edge_node_only
      model_path: ''
      train: True
      debug: False
      minibatch_size: 1
    '''

cfg = yaml.load(cfg, Loader=yaml.Loader)
process_config(cfg)
loader, cfg['data_keys'] = loader_factory(cfg)

import torch
import pickle
from train_voxel_gnn import GraphDataset

from os import listdir
from torch.utils.data import DataLoader
from mlreco.main_funcs import cycle

data_path = '/gpfs/slac/staas/fs1/g/neutrino/qatom/gnn_pi0_reco_nocompton/'
valid_data = GraphDataset(data_path, 1, data_key='00000')
valid_dataset = cycle(valid_data)

from mlreco.trainval import trainval
from mlreco.main_funcs import get_data_minibatched
    _, _, labels = generate_truth(d, positions, groups, edges)
    contents = (edges, nf, ef, labels)
    pickle.dump(contents, open(fname, 'wb+'))
    print('saved', fname)
    return fname


files = sorted(listdir(cfg['iotool']['dataset']['data_dirs'][0]))
orig_key = cfg['iotool']['dataset']['data_key']
worker_inputs = []
for f in range(len(files)):
    if orig_key not in files[f]:
        continue

    cfg['iotool']['dataset']['data_key'] = files[f]
    loader, _ = loader_factory(cfg)
    dataset = iter(cycle(loader))
    names = []
    while True:
        d = next(dataset, None)
        name = d['index'][0][0]
        if name in names:
            break
        names.append(name)
        if name < start_num:
            continue
        fname = OUTPUT_DATASET + str(f * 1000 + name).zfill(10) + '.pkl'
        if BLUR_DATASET:
            d = process(d)
        try:
            worker(d, fname)
Beispiel #8
0
e4 = []
e5 = []
e6 = []
e7 = []

l3 = np.zeros(0)
l4 = np.zeros(0)
l5 = np.zeros(0)
l6 = np.zeros(0)
l7 = np.zeros(0)  # backward gamma separation
l8 = np.empty((0, 3))  # fitted gamma directions
l9 = np.zeros(0)  # gamma pca values
l10 = np.zeros(0)  # gamma nhits in pca
for f in range(len(files)):
    cfg['iotool']['dataset']['data_key'] = files[f]
    loader, data_keys = factories.loader_factory(cfg)
    it = iter(loader)
    preader = particle_reader(cfg['iotool']['dataset']['data_dirs'][0] + '/' +
                              files[f])
    for i in range(len(loader.dataset)):
        #         print(i/len(loader.dataset))
        pinfo = preader.get_event(i)
        out = worker(next(it), f)
        if out is not None and len(out[0][0][0]) > 0:
            all_energies, pi0_cos, particles, gamma_sep, gamma_dir, gamma_pca, gamma_pca_nhits = out

            energies = pinfo['creation_energy'][particles]
            true_x = pinfo['direction_x'][particles]
            true_y = pinfo['direction_y'][particles]
            true_z = pinfo['direction_z'][particles]
            true_dirs = np.array([true_x.T, true_y.T, true_z.T]).T
import pickle

start_num = 0

files = sorted(listdir(cfg['iotool']['dataset']['data_dirs'][0]))
orig_key = cfg['iotool']['dataset']['data_key']
worker_inputs = []
for f in range(len(files)):
    if orig_key not in files[f]:
        continue
#     if int(files[f][-10:-5]) < start_num:
#         continue

    cfg['iotool']['dataset']['data_key'] = files[f]
    loader, _ = factories.loader_factory(cfg)
    dataset = iter(loader)
    names = []
    index = 0
    num = 0
    while True:
        d = next(dataset, None)
        if num < start_num:
            continue
        num += 1
        if d is None:
            break

        n = d['index'][0][0]
        print(n)
        if n in names: