def prepare_manager(self, mode): if mode not in self._config['IO']: raise Exception( "Missing IO config mode {} but trying to prepare manager.". format(mode)) else: start = time.time() io = larcv_threadio() io_cfg = { 'filler_name': self._config['IO'][mode]['FILLER'], 'verbosity': self._config['IO'][mode]['VERBOSITY'], 'filler_cfg': self._config['IO'][mode]['FILE'] } io.configure(io_cfg) io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({mode: io}) self._dataloaders[mode].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) end = time.time() sys.stdout.write("Time to start {0} IO: {1:.2}s\n".format( mode, end - start)) return
def __init__(self, cfg, ioname="ThreadProcessor", filelist=""): self.N = cfg.IMAGE_SIZE # shape of canvas self.cfg = cfg self.dim = 3 if cfg.DATA_3D else 2 np.random.seed(cfg.SEED) self.train_uresnet = (cfg.NET == 'base' and cfg.BASE_NET == 'uresnet') if cfg.DATA_3D: if self.train_uresnet and not self.cfg.URESNET_WEIGHTING: replace = 4 config_file = 'uresnet_3d.cfg' elif self.train_uresnet and self.cfg.URESNET_WEIGHTING: replace = 6 config_file = 'uresnet_3d_weight.cfg' elif self.cfg.NET == 'full': replace = 8 config_file = 'ppn_uresnet_3d.cfg' else: replace = 6 config_file = 'ppn_3d.cfg' else: if self.train_uresnet and not self.cfg.URESNET_WEIGHTING: replace = 4 config_file = 'uresnet_2d.cfg' elif self.train_uresnet and self.cfg.URESNET_WEIGHTING: replace = 6 config_file = 'uresnet_2d_weight.cfg' elif self.cfg.NET == 'full': replace = 8 config_file = 'ppn_uresnet_2d.cfg' else: replace = 6 config_file = 'ppn_2d.cfg' io_config = open(os.path.join( os.path.dirname(__file__), config_file)).read() % ( (ioname, cfg.SEED, filelist) + (ioname,)*replace ) self.ioname = ioname filler_config = tempfile.NamedTemporaryFile('w') filler_config.write(io_config) filler_config.flush() dataloader_cfg = {} dataloader_cfg["filler_name"] = "%sIO" % ioname dataloader_cfg["verbosity"] = 0, dataloader_cfg['filler_cfg'] = filler_config.name # make explicit numpy array copy as we'll play w/ image data dataloader_cfg['make_copy'] = False self.proc = larcv_threadio() self.proc.configure(dataloader_cfg) self.proc.set_next_index(cfg.NEXT_INDEX) self.proc.start_manager(self.cfg.BATCH_SIZE) self.proc.next()
def __init__( self, cfg, fillername, verbosity=0, loadallinmem=False, randomize_inmem_data=True, store_eventids=False, max_inmem_events=-1, batchsize=None ): # we hide this hear so that we can use this package for both larcv and larcv2 larcv.PSet # touch this to force libBase to load, which has CreatePSetFromFile from larcv.dataloader2 import larcv_threadio self.verbosity = verbosity self.batchsize = batchsize self.randomize_inmem_data = randomize_inmem_data self.loadallinmem = loadallinmem self.max_inmem_events = max_inmem_events self.cfg = cfg self.store_eventids = store_eventids # we setup the larcv threadfiller class, which handles io from larcv files # setup cfg dictionary needed for larcv_threadio self.filler_cfg = {} self.filler_cfg["filler_name"] = fillername self.filler_cfg["verbosity"] = self.verbosity self.filler_cfg["filler_cfg"] = self.cfg if not os.path.exists(self.cfg): raise ValueError("Could not find filler configuration file: %s"%(self.cfg)) # we read the first line of the config file, which should have name of config parameter set linepset = open(self.cfg,'r').readlines() self.cfgname = linepset[0].split(":")[0].strip() # we load the pset ourselves, as we want access to values in 'ProcessName' list # will use these as the names of the data products loaded. store in self.datalist self.pset = larcv.CreatePSetFromFile(self.cfg,self.cfgname).get("larcv::PSet")(self.cfgname) datastr_v = self.pset.get("std::vector<std::string>")("ProcessName") typestr_v = self.pset.get("std::vector<std::string>")("ProcessType") self.datalist = [] self.dtypelist = [] for i in range(0,datastr_v.size()): self.datalist.append(datastr_v[i]) self.dtypelist.append(typestr_v[i]) # finally, configure io print "CONFIGURE" self.io = larcv_threadio() self.io.configure(self.filler_cfg) if loadallinmem: self._loadinmem() if self.batchsize is not None: self.start(self.batchsize)
def initialize(self): # Instantiate and configure if not self._cfg.MAIN_INPUT_CONFIG: print('Must provide larcv data filler configuration file!') return # # Data IO configuration # # Main input stream self._input_main = larcv_threadio() filler_cfg = {'filler_name' : 'ThreadProcessor', 'verbosity' : 0, 'filler_cfg' : self._cfg.MAIN_INPUT_CONFIG} self._input_main.configure(filler_cfg) self._input_main.start_manager(self._cfg.MINIBATCH_SIZE) # Test input stream (optional) if self._cfg.TEST_INPUT_CONFIG: self._input_test = larcv_threadio() filler_cfg = {'filler_name' : 'TestIO', 'verbosity' : 0, 'filler_cfg' : self._cfg.TEST_INPUT_CONFIG} self._input_test.configure(filler_cfg) self._input_test.start_manager(self._cfg.TEST_BATCH_SIZE) # Output stream (optional) if self._cfg.ANA_OUTPUT_CONFIG: self._output = larcv.IOManager(self._cfg.ANA_OUTPUT_CONFIG) self._output.initialize() # Retrieve image/label dimensions self._input_main.next(store_entries = (not self._cfg.TRAIN), store_event_ids = (not self._cfg.TRAIN)) dim_data = self._input_main.fetch_data(self._cfg.KEYWORD_DATA).dim() return dim_data
def IOPrep(name, b): if (name == 'train'): cfg = Train_cfg elif (name == 'test'): cfg = Test_cfg else: print('Bad name, check CFG') proc = larcv_threadio() proc.configure(cfg) proc.start_manager(b) #Need sleep for manager to finish loading time.sleep(2) proc.next() return proc
def IOPrep(name): if(name == 'Train'): cfg = Train_cfg b = 100 elif(name == 'Test'): cfg = Test_cfg b = 100 else: print('Bad name, check ImageType') quit() proc = larcv_threadio() proc.configure(cfg) proc.start_manager(b) #Need sleep for manager to finish loading time.sleep(2) proc.next() return proc
def initialize(self): from larcv import larcv from larcv.dataloader2 import larcv_threadio self._input_cfg = make_input_larcv_cfg(self._flags) cfg = { 'filler_name': 'MainIO', 'verbosity': 0, 'filler_cfg': self._input_cfg.name } self._ihandler = larcv_threadio() self._ihandler.configure(cfg) self._ihandler.start_manager(self.batch_per_step()) self._ihandler.next(store_entries=True, store_event_ids=True) self._next_counter = 0 self._num_entries = self._ihandler._proc.pd().io().get_n_entries() self._num_channels = self._ihandler.fetch_data( self._flags.DATA_KEYS[0]).dim()[-1] if self._flags.OUTPUT_FILE: self._output_cfg = make_output_larcv_cfg(self._flags) self._fout = larcv.IOManager(self._output_cfg.name) self._fout.initialize()
SAVE_WEIGHTS = 100 # Check log directory is empty train_logdir = os.path.join(LOGDIR,'train') test_logdir = os.path.join(LOGDIR,'test') if not os.path.isdir(train_logdir): os.makedirs(train_logdir) if not os.path.isdir(test_logdir): os.makedirs(test_logdir) if len(os.listdir(train_logdir)) or len(os.listdir(test_logdir)): sys.stderr.write('Error: train or test log dir not empty...\n') raise OSError # # Step 0: IO # # for "train" data set train_io = larcv_threadio() # create io interface train_io_cfg = {'filler_name' : 'TrainIO', 'verbosity' : 0, 'filler_cfg' : TRAIN_IO_CONFIG} train_io.configure(train_io_cfg) # configure train_io.start_manager(TRAIN_BATCH_SIZE) # start read thread time.sleep(2) train_io.next() # for "test" data set test_io = larcv_threadio() # create io interface test_io_cfg = {'filler_name' : 'TestIO', 'verbosity' : 0, 'filler_cfg' : TEST_IO_CONFIG} test_io.configure(test_io_cfg) # configure test_io.start_manager(TEST_BATCH_SIZE) # start read thread
fname4 = 'inf4.csv' fpath = wpath + ppath + fname fpath2 = wpath + ppath + fname2 fpath3 = wpath + ppath + fname3 fpath4 = wpath + ppath + fname4 fpathC = fpath3 fnameC = fname3 #Check if inference already done if os.path.exists(fpathC) != True: print('File does not exist yet') #Set up IO tep = larcv_threadio() tep.configure(cfg) tep.start_manager(batchsize) time.sleep(2) tep.next(store_entries=True, store_event_ids=True) #Call the dimensions of the data tedim = tep.fetch_data('test_image').dim() ######## #Set input rawinput = tf.placeholder(tf.float32, [None, tedim[1] * tedim[2] * tedim[3]], name='raw') input2d = tf.reshape(rawinput, [-1, tedim[1], tedim[2], tedim[3]],
import ROOT from larcv import larcv from larcv.dataloader2 import larcv_threadio import numpy proc = larcv_threadio() filler_cfg = { "filler_name": "ThreadProcessor", "verbosity": 0, "filler_cfg": "example_dataloader.cfg" } proc.configure(filler_cfg) proc.start_manager(10) proc.next() img = proc.fetch_data("image") lbl = proc.fetch_data("label") print img.dim() print lbl.dim() proc.stop_manager()
def initialize(self): # Prepare data managers: if 'TRAIN_CONFIG' in self._config: train_io = larcv_threadio() train_io_cfg = { 'filler_name': self._config['TRAIN_CONFIG']['FILLER'], 'verbosity': self._config['TRAIN_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['TRAIN_CONFIG']['FILE'] } train_io.configure(train_io_cfg) train_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'train': train_io}) if 'TEST_CONFIG' in self._config: print "here" test_io = larcv_threadio() test_io_cfg = { 'filler_name': self._config['TEST_CONFIG']['FILLER'], 'verbosity': self._config['TEST_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['TEST_CONFIG']['FILE'] } test_io.configure(test_io_cfg) test_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'test': test_io}) if 'ANA_CONFIG' in self._config: ana_io = larcv_threadio() ana_io_cfg = { 'filler_name': self._config['ANA_CONFIG']['FILLER'], 'verbosity': self._config['ANA_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['ANA_CONFIG']['FILE'] } ana_io.configure(ana_io_cfg) ana_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'ana': ana_io}) # Start up the network: if 'TRAIN_CONFIG' in self._config: self._dataloaders['train'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['train'].fetch_data( self._config['TRAIN_CONFIG']['KEYWORD_DATA']).dim() if 'TEST_CONFIG' in self._config: self._dataloaders['test'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['test'].fetch_data( self._config['TEST_CONFIG']['KEYWORD_DATA']).dim() if 'ANA_CONFIG' in self._config: self._dataloaders['ana'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['ana'].fetch_data( self._config['ANA_CONFIG']['KEYWORD_DATA']).dim() # Net construction: self._net = resnet(self._config) self._net.construct_network(dims=dim_data) # # Network variable initialization # # Configure global process (session, summary, etc.) # Initialize variables self._sess = tf.Session() self._writer = tf.summary.FileWriter(self._config['LOGDIR'] + '/train/') self._saver = tf.train.Saver() if 'TEST_CONFIG' in self._config: self._writer_test = tf.summary.FileWriter(self._config['LOGDIR'] + '/test/') if not self._config['RESTORE']: self._sess.run(tf.global_variables_initializer()) self._writer.add_graph(self._sess.graph) else: latest_checkpoint = tf.train.latest_checkpoint( self._config['LOGDIR'] + "/train/checkpoints/") print "Restoring model from {}".format(latest_checkpoint) self._saver.restore(self._sess, latest_checkpoint)
def initialize(self): # Instantiate and configure if not self._cfg.MAIN_INPUT_CONFIG: print('Must provide larcv data filler configuration file!') return # Set random seed for reproducibility tf.set_random_seed(self._cfg.TF_RANDOM_SEED) # # Data IO configuration # # Main input stream self._input_main = larcv_threadio() filler_cfg = { 'filler_name': 'MainIO', 'verbosity': 0, 'filler_cfg': self._cfg.MAIN_INPUT_CONFIG } self._input_main.configure(filler_cfg) self._input_main.start_manager(self._cfg.MINIBATCH_SIZE) # Test input stream (optional) if self._cfg.TEST_INPUT_CONFIG: self._input_test = larcv_threadio() filler_cfg = { 'filler_name': 'TestIO', 'verbosity': 0, 'filler_cfg': self._cfg.TEST_INPUT_CONFIG } self._input_test.configure(filler_cfg) self._input_test.start_manager(self._cfg.TEST_BATCH_SIZE) # Output stream (optional) if self._cfg.ANA_OUTPUT_CONFIG: self._output = larcv.IOManager(self._cfg.ANA_OUTPUT_CONFIG) self._output.initialize() # # Network construction # # Retrieve image/label dimensions self._input_main.next(store_entries=(not self._cfg.TRAIN), store_event_ids=(not self._cfg.TRAIN)) dim_data = self._input_main.fetch_data(self._cfg.KEYWORD_DATA).dim() dims = [] self._net = uresnet(dims=dim_data[1:], num_class=3, base_num_outputs=self._cfg.BASE_NUM_FILTERS, debug=False) if self._cfg.TRAIN: self._net.construct(trainable=self._cfg.TRAIN, use_weight=self._cfg.USE_WEIGHTS, learning_rate=self._cfg.LEARNING_RATE) else: self._net.construct(trainable=self._cfg.TRAIN, use_weight=self._cfg.USE_WEIGHTS) # # Network variable initialization # # Configure global process (session, summary, etc.) # Initialize variables #self._sess = tf.InteractiveSession() self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) self._writer_train = None self._writer_test = None if self._cfg.LOGDIR: logdir = os.path.join(self._cfg.LOGDIR, 'train') if not os.path.isdir(logdir): os.makedirs(logdir) # Create a summary writer handle self._writer_train = tf.summary.FileWriter(logdir) self._writer_train.add_graph(self._sess.graph) if self._input_test: logdir = os.path.join(self._cfg.LOGDIR, 'test') if not os.path.isdir(logdir): os.makedirs(logdir) self._writer_test = tf.summary.FileWriter(logdir) self._writer_test.add_graph(self._sess.graph) saver = None if self._cfg.SAVE_FILE: save_dir = self._cfg.SAVE_FILE[0:self._cfg.SAVE_FILE.rfind('/')] if save_dir and not os.path.isdir(save_dir): os.makedirs(save_dir) # Create weights saver self._saver = tf.train.Saver( max_to_keep=self._cfg.CHECKPOINT_NMAX, keep_checkpoint_every_n_hours=self._cfg.CHECKPOINT_NHOUR) # Override variables if wished if self._cfg.LOAD_FILE: vlist = [] self._iteration = self.iteration_from_file_name( self._cfg.LOAD_FILE) parent_vlist = [] parent_vlist = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) for v in parent_vlist: if v.name in self._cfg.AVOID_LOAD_PARAMS: print('\033[91mSkipping\033[00m loading variable', v.name, 'from input weight...') continue print('\033[95mLoading\033[00m variable', v.name, 'from', self._cfg.LOAD_FILE) vlist.append(v) reader = tf.train.Saver(var_list=vlist) reader.restore(self._sess, self._cfg.LOAD_FILE) #self._iteration = 0 self._batch_metrics = None self._descr_metrics = None
def initialize(self): dim_data = None # Prepare data managers: if 'TRAIN_CONFIG' in self._config: start = time.time() train_io = larcv_threadio() train_io_cfg = { 'filler_name': self._config['TRAIN_CONFIG']['FILLER'], 'verbosity': self._config['TRAIN_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['TRAIN_CONFIG']['FILE'] } train_io.configure(train_io_cfg) train_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'train': train_io}) self._dataloaders['train'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['train'].fetch_data( self._config['TRAIN_CONFIG']['KEYWORD_DATA']).dim() end = time.time() sys.stdout.write("Time to start TRAIN IO: {0:.2}s\n".format(end - start)) if 'TEST_CONFIG' in self._config: start = time.time() test_io = larcv_threadio() test_io_cfg = { 'filler_name': self._config['TEST_CONFIG']['FILLER'], 'verbosity': self._config['TEST_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['TEST_CONFIG']['FILE'] } test_io.configure(test_io_cfg) test_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'test': test_io}) self._dataloaders['test'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['test'].fetch_data( self._config['TEST_CONFIG']['KEYWORD_DATA']).dim() end = time.time() sys.stdout.write("Time to start TEST IO: {0:.2}s\n".format(end - start)) if 'ANA_CONFIG' in self._config: start = time.time() ana_io = larcv_threadio() ana_io_cfg = { 'filler_name': self._config['ANA_CONFIG']['FILLER'], 'verbosity': self._config['ANA_CONFIG']['VERBOSITY'], 'filler_cfg': self._config['ANA_CONFIG']['FILE'] } ana_io.configure(ana_io_cfg) ana_io.start_manager(self._config['MINIBATCH_SIZE']) self._dataloaders.update({'ana': ana_io}) self._dataloaders['ana'].next( store_entries=(not self._config['TRAINING']), store_event_ids=(not self._config['TRAINING'])) dim_data = self._dataloaders['ana'].fetch_data( self._config['ANA_CONFIG']['KEYWORD_DATA']).dim() # Output stream (optional) end = time.time() sys.stdout.write("Time to start ANA IO: {0:.2}s\n".format(end - start)) if 'OUTPUT' in self._config['ANA_CONFIG']: print "Initializing output file" self._output = larcv.IOManager( self._config['ANA_CONFIG']['OUTPUT']) self._output.initialize() # Net construction: start = time.time() sys.stdout.write("Begin constructing network\n") self._net = uresnet(self._config) self._net.construct_network(dims=dim_data) end = time.time() sys.stdout.write( "Done constructing network. ({0:.2}s)\n".format(end - start)) # # Network variable initialization # # Configure global process (session, summary, etc.) # Initialize variables self._sess = tf.Session() self._writer = tf.summary.FileWriter(self._config['LOGDIR'] + '/train/') self._saver = tf.train.Saver() if 'TEST_CONFIG' in self._config: self._writer_test = tf.summary.FileWriter(self._config['LOGDIR'] + '/test/') if not self._config['RESTORE']: self._sess.run(tf.global_variables_initializer()) self._writer.add_graph(self._sess.graph) else: latest_checkpoint = tf.train.latest_checkpoint( self._config['LOGDIR'] + "/train/checkpoints/") print "Restoring model from {}".format(latest_checkpoint) self._saver.restore(self._sess, latest_checkpoint)