def __init__( self, input_larcv_files, input_ana_files, npairs=20000, use_triplets=True ): self.input_larcv_files = input_larcv_files self.input_ana_files = input_ana_files # load chain self.match_v = std.vector("larflow::FlowMatchMap")() self.tchain = rt.TChain("flowmatchdata") for fin in input_ana_files: print "adding ana file: ",fin self.tchain.Add( fin ) self.tchain.SetBranchAddress( "matchmap", rt.AddressOf(self.match_v)) print "chain has ",self.tchain.GetEntries()," entries" self.params = {"has_truth":True, "verbose":False, "npairs":npairs, "matchtree":self.tchain, "match_v":self.match_v} self.nworkers = 1 self.feeder = LArCVServer(1,"larmatchfeed", load_larmatch_data, self.input_larcv_files, self.nworkers, io_tickbackward=False, func_params=self.params)
def __init__(self, inputfile, batchsize, input_producer_name, true_producer_name, plane, tickbackward=False, nworkers=4, readonly_products=None, feedername=None): super(SparseInfillPyTorchDataset, self).__init__() if type(inputfile) is str: self.inputfiles = [inputfile] elif type(inputfile) is list: self.inputfiles = inputfile if type(input_producer_name) is not str: raise ValueError("producer_name type must be str") # get length by querying the tree self.nentries = 0 tchain = rt.TChain("sparseimg_{}_tree".format(input_producer_name)) for finput in self.inputfiles: tchain.Add(finput) self.nentries = tchain.GetEntries() #print "nentries: ",self.nentries del tchain if feedername is None: self.feedername = "SparseInfillImagePyTorchDataset_%d"%\ (SparseImagePyTorchDataset.idCounter) else: self.feedername = feedername self.batchsize = batchsize self.nworkers = nworkers readonly_products = None params = { "inputproducer": input_producer_name, "trueproducer": true_producer_name, "plane": plane } # note, with way LArCVServer workers, must always use batch size of 1 # because larcvserver expects entries in each batch to be same size, # but in sparse representations this is not true # we must put batches together ourselves for sparseconv operations self.feeder = LArCVServer(1, self.feedername, load_cropped_sparse_infill, self.inputfiles, self.nworkers, server_verbosity=-1, worker_verbosity=-1, io_tickbackward=tickbackward, func_params=params) SparseInfillPyTorchDataset.idCounter += 1
def __init__(self,inputfile,batchsize,tickbackward=False,nworkers=4): super(SparseImagePyTorchDataset,self).__init__() if type(inputfile) is str: self.inputfiles = [inputfile] elif type(inputfile) is list: self.inputfiles = inputfile # get length by querying the tree self.nentries = 0 tchain = rt.TChain("image2d_wire_tree") for finput in self.inputfiles: tchain.Add(finput) self.nentries = tchain.GetEntries() del tchain self.feedername = "SparseImagePyTorchDataset_%d"%(SparseImagePyTorchDataset.idCounter) self.batchsize = batchsize self.nworkers = nworkers self.feeder = LArCVServer(self.batchsize,self.feedername, load_sparse_ssnetdata,self.inputfiles,self.nworkers, server_verbosity=0,worker_verbosity=0, io_tickbackward=tickbackward) SparseImagePyTorchDataset.idCounter += 1
class SparseImagePyTorchDataset(torchdata.Dataset): idCounter = 0 def __init__(self,inputfile,batchsize,tickbackward=False,nworkers=4): super(SparseImagePyTorchDataset,self).__init__() if type(inputfile) is str: self.inputfiles = [inputfile] elif type(inputfile) is list: self.inputfiles = inputfile # get length by querying the tree self.nentries = 0 tchain = rt.TChain("image2d_wire_tree") for finput in self.inputfiles: tchain.Add(finput) self.nentries = tchain.GetEntries() del tchain self.feedername = "SparseImagePyTorchDataset_%d"%(SparseImagePyTorchDataset.idCounter) self.batchsize = batchsize self.nworkers = nworkers self.feeder = LArCVServer(self.batchsize,self.feedername, load_sparse_ssnetdata,self.inputfiles,self.nworkers, server_verbosity=0,worker_verbosity=0, io_tickbackward=tickbackward) SparseImagePyTorchDataset.idCounter += 1 def __len__(self): #print "return length of sample:",self.nentries return self.nentries def __getitem__(self,index): """ we do not have a way to get the index (can change that)""" #print "called get item for index=",index," ",self.feeder.identity,"pid=",os.getpid() data = self.feeder.get_batch_dict() # remove the feeder variable del data["feeder"] #print "called get item: ",data.keys() return data
class LArMatchDataset: def __init__( self, input_larcv_files, input_ana_files, npairs=20000, use_triplets=True ): self.input_larcv_files = input_larcv_files self.input_ana_files = input_ana_files # load chain self.match_v = std.vector("larflow::FlowMatchMap")() self.tchain = rt.TChain("flowmatchdata") for fin in input_ana_files: print "adding ana file: ",fin self.tchain.Add( fin ) self.tchain.SetBranchAddress( "matchmap", rt.AddressOf(self.match_v)) print "chain has ",self.tchain.GetEntries()," entries" self.params = {"has_truth":True, "verbose":False, "npairs":npairs, "matchtree":self.tchain, "match_v":self.match_v} self.nworkers = 1 self.feeder = LArCVServer(1,"larmatchfeed", load_larmatch_data, self.input_larcv_files, self.nworkers, io_tickbackward=False, func_params=self.params) def __len__(self): return int(self.params["matchtree"].GetEntries()) def gettensorbatch(self,batchsize,device): batches = [] source_npts = [] target1_npts = [] target2_npts = [] pair1_npts = [] pair2_npts = [] source_tot = 0 target1_tot = 0 target2_tot = 0 pair1_tot = 0 pair2_tot = 0 for ibatch in range(batchsize): data = self.feeder.get_batch_dict() source_npts.append( data["coord_source"][0].shape[0] ) source_tot += source_npts[-1] target1_npts.append( data["coord_target1"][0].shape[0] ) target1_tot += target1_npts[-1] target2_npts.append( data["coord_target2"][0].shape[0] ) target2_tot += target2_npts[-1] pair1_npts.append( int(data["npairs_flow1"][0]) ) pair1_tot += pair1_npts[-1] pair2_npts.append( int(data["npairs_flow2"][0]) ) pair2_tot += pair2_npts[-1] batches.append( data ) tdata = {"coord_source": np.zeros( (source_tot,3), dtype=np.int32 ), "coord_target1":np.zeros( (target1_tot,3), dtype=np.int32 ), "coord_target2":np.zeros( (target2_tot,3), dtype=np.int32 ), "feat_source": np.zeros( (source_tot,1), dtype=np.float32 ), "feat_target1": np.zeros( (target1_tot,1), dtype=np.float32 ), "feat_target2": np.zeros( (target2_tot,1), dtype=np.float32 ), "pairs_flow1": [], "pairs_flow2": [], "entries":[], "npairs1":[], "npairs2":[] } source_start = 0 target1_start = 0 target2_start = 0 npair1_start = 0 npair2_start = 0 for ibatch,data in enumerate(batches): source_end = source_start + source_npts[ibatch] target1_end = target1_start + target1_npts[ibatch] target2_end = target2_start + target2_npts[ibatch] tdata["coord_source"][source_start:source_end,0:2] = data["coord_source"][0][:,0:2] tdata["coord_target1"][target1_start:target1_end,0:2] = data["coord_target1"][0][:,0:2] tdata["coord_target2"][target2_start:target2_end,0:2] = data["coord_target2"][0][:,0:2] tdata["feat_source"][source_start:source_end,0] = data["feat_source"][0][:] tdata["feat_target1"][target1_start:target1_end,0] = data["feat_target1"][0][:] tdata["feat_target2"][target2_start:target2_end,0] = data["feat_target2"][0][:] source_start = source_end target1_start = target1_end target2_start = target2_end tdata["pairs_flow1"].append( torch.from_numpy(data["matchpairs_flow1"][0][0:pair1_npts[ibatch],:]).to(device) ) tdata["pairs_flow2"].append( torch.from_numpy(data["matchpairs_flow2"][0][0:pair2_npts[ibatch],:]).to(device) ) tdata["entries"].append( data["entry"][0] ) tdata["npairs1"].append( data["npairs_flow1"][0] ) tdata["npairs2"].append( data["npairs_flow2"][0] ) for name,arr_np in tdata.items(): if type(arr_np) is np.ndarray: tdata[name] = torch.from_numpy( arr_np ).to(device) return tdata
def main(): global best_prec1 global writer if GPUMODE: DEVICE = torch.device("cuda:%d"%(DEVICE_IDS[0])) else: DEVICE = torch.device("cpu") # create model, mark it to run on the GPU model = LArFlowUResNet( num_classes=2, input_channels=1, layer_channels=[16,32,64,128], layer_strides= [ 2, 2, 2, 2], num_final_features=64, use_deconvtranspose=False, onlyone_res=True, showsizes=False, use_visi=False, use_grad_checkpoints=True, gpuid1=DEVICE_IDS[0], gpuid2=DEVICE_IDS[0] ) # Resume training option if RESUME_FROM_CHECKPOINT: print "RESUMING FROM CHECKPOINT FILE ",CHECKPOINT_FILE checkpoint = torch.load( CHECKPOINT_FILE, map_location=CHECKPOINT_MAP_LOCATIONS ) # load weights to gpuid best_prec1 = checkpoint["best_prec1"] if CHECKPOINT_FROM_DATA_PARALLEL: model = nn.DataParallel( model, device_ids=DEVICE_IDS ) # distribute across device_ids model.load_state_dict(checkpoint["state_dict"]) if not CHECKPOINT_FROM_DATA_PARALLEL and len(DEVICE_IDS)>1: model = nn.DataParallel( model, device_ids=DEVICE_IDS ).to(device=DEVICE) # distribute across device_ids else: model = model.to(device=DEVICE) # uncomment to dump model if False: print "Loaded model: ",model return # define loss function (criterion) and optimizer maxdist = 200.0 criterion = LArFlowCombinedLoss(IMAGE_WIDTH,IMAGE_HEIGHT,BATCHSIZE,maxdist, VISI_WEIGHT,CONSISTENCY_WEIGHT).to(device=DEVICE) # training parameters lr = 1.0e-3 momentum = 0.9 weight_decay = 1.0e-4 # training length batchsize_train = BATCHSIZE batchsize_valid = BATCHSIZE_VALID#*len(DEVICE_IDS) start_epoch = 0 epochs = 10 num_iters = 10000 iter_per_epoch = None # determined later iter_per_valid = 10 nbatches_per_itertrain = 20 itersize_train = batchsize_train*nbatches_per_itertrain trainbatches_per_print = -1 nbatches_per_itervalid = 40 itersize_valid = batchsize_valid*nbatches_per_itervalid validbatches_per_print = -1 # SETUP OPTIMIZER # SGD w/ momentum #optimizer = torch.optim.SGD(model.parameters(), lr, # momentum=momentum, # weight_decay=weight_decay) # ADAM # betas default: (0.9, 0.999) for (grad, grad^2). smoothing coefficient for grad. magnitude calc. #optimizer = torch.optim.Adam(model.parameters(), # lr=lr, # weight_decay=weight_decay) # RMSPROP optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # optimize algorithms based on input size (good if input size is constant) cudnn.benchmark = True # LOAD THE DATASET iotrain = LArCVServer(batchsize_train,"train",load_data,INPUTFILE,6) iovalid = LArCVServer(batchsize_valid,"valid",load_data,INPUTFILE,2) print "pause to give time to feeders" #NENTRIES = len(iotrain) NENTRIES = 100000 print "Number of entries in training set: ",NENTRIES if NENTRIES>0: iter_per_epoch = NENTRIES/(itersize_train) if num_iters is None: # we set it by the number of request epochs num_iters = (epochs-start_epoch)*NENTRIES else: epochs = num_iters/NENTRIES else: iter_per_epoch = 1 print "Number of epochs: ",epochs print "Iter per epoch: ",iter_per_epoch if False: # for debugging/testing data sample = "train" iosample = {"valid":iovalid, "train":iotrain} print "TEST BATCH: sample=",sample source,target1,target2,flow1,flow2,visi1,visi2,fvisi1,fvisi2,Wminx,Uminx,Vminx = prep_data( iosample[sample], batchsize_train, IMAGE_WIDTH, IMAGE_HEIGHT, ADC_THRESH, DEVICE ) # load opencv print "Print using OpenCV" import cv2 as cv cv.imwrite( "testout_source.png", source.cpu().numpy()[0,0,:,:] ) cv.imwrite( "testout_target1.png", target1.cpu().numpy()[0,0,:,:] ) cv.imwrite( "testout_target2.png", target2.cpu().numpy()[0,0,:,:] ) print "source shape: ",source.cpu().numpy().shape print "minX-src: ",Wminx print "minX-U: ",Uminx print "minX-V: ",Vminx sample = "valid" print "TEST BATCH: sample=",sample source,target1,target2,flow1,flow2,visi1,visi2,fvisi1,fvisi2,Wminx,Uminx,Vminx = prep_data( iosample[sample], batchsize_valid, IMAGE_WIDTH, IMAGE_HEIGHT, ADC_THRESH, DEVICE ) print "STOP FOR DEBUGGING" sys.exit(-1) with torch.autograd.profiler.profile(enabled=RUNPROFILER) as prof: # Resume training option #if RESUME_FROM_CHECKPOINT: # print "RESUMING FROM CHECKPOINT FILE ",CHECKPOINT_FILE # checkpoint = torch.load( CHECKPOINT_FILE, map_location=CHECKPOINT_MAP_LOCATIONS ) # best_prec1 = checkpoint["best_prec1"] # model.load_state_dict(checkpoint["state_dict"]) #optimizer.load_state_dict(checkpoint['optimizer']) #if GPUMODE: # optimizer.cuda(GPUID) for ii in range(start_iter, num_iters): adjust_learning_rate(optimizer, ii, lr) print "MainLoop Iter:%d Epoch:%d.%d "%(ii,ii/iter_per_epoch,ii%iter_per_epoch), for param_group in optimizer.param_groups: print "lr=%.3e"%(param_group['lr']), print # train for one iteration try: _ = train(iotrain, DEVICE, batchsize_train, model, criterion, optimizer, nbatches_per_itertrain, ii, trainbatches_per_print) except Exception,e: print "Error in training routine!" print e.message print e.__class__.__name__ traceback.print_exc(e) break # evaluate on validation set if ii%iter_per_valid==0: try: totloss, flow1acc5, flow2acc5 = validate(iovalid, DEVICE, batchsize_valid, model, criterion, nbatches_per_itervalid, ii, validbatches_per_print) except Exception,e: print "Error in validation routine!" print e.message print e.__class__.__name__ traceback.print_exc(e) break # remember best prec@1 and save checkpoint prec1 = 0.5*(flow1acc5+flow2acc5) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) # check point for best model if is_best: print "Saving best model" save_checkpoint({ 'iter':ii, 'epoch': ii/iter_per_epoch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, -1) # periodic checkpoint if ii>0 and ii%ITER_PER_CHECKPOINT==0: print "saving periodic checkpoint" save_checkpoint({ 'iter':ii, 'epoch': ii/iter_per_epoch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, False, ii) # flush the print buffer after iteration sys.stdout.flush()
class SparseInfillPyTorchDataset(torchdata.Dataset): idCounter = 0 def __init__(self, inputfile, batchsize, input_producer_name, true_producer_name, plane, tickbackward=False, nworkers=4, readonly_products=None, feedername=None): super(SparseInfillPyTorchDataset, self).__init__() if type(inputfile) is str: self.inputfiles = [inputfile] elif type(inputfile) is list: self.inputfiles = inputfile if type(input_producer_name) is not str: raise ValueError("producer_name type must be str") # get length by querying the tree self.nentries = 0 tchain = rt.TChain("sparseimg_{}_tree".format(input_producer_name)) for finput in self.inputfiles: tchain.Add(finput) self.nentries = tchain.GetEntries() #print "nentries: ",self.nentries del tchain if feedername is None: self.feedername = "SparseInfillImagePyTorchDataset_%d"%\ (SparseImagePyTorchDataset.idCounter) else: self.feedername = feedername self.batchsize = batchsize self.nworkers = nworkers readonly_products = None params = { "inputproducer": input_producer_name, "trueproducer": true_producer_name, "plane": plane } # note, with way LArCVServer workers, must always use batch size of 1 # because larcvserver expects entries in each batch to be same size, # but in sparse representations this is not true # we must put batches together ourselves for sparseconv operations self.feeder = LArCVServer(1, self.feedername, load_cropped_sparse_infill, self.inputfiles, self.nworkers, server_verbosity=-1, worker_verbosity=-1, io_tickbackward=tickbackward, func_params=params) SparseInfillPyTorchDataset.idCounter += 1 def __len__(self): #print "return length of sample:",self.nentries return self.nentries def __getitem__(self, index): """ we do not have a way to get the index (can change that)""" #print "called get item for index=",index," ",self.feeder.identity,"pid=",os.getpid() data = self.feeder.get_batch_dict() # remove the feeder variable del data["feeder"] #print "called get item: ",data.keys() return data def get_tensor_batch(self, device): """ get batch, convert into torch tensors inputs ------- device: torch.device specifies either gpu or cpu output ------- data [dict of torch tensors] """ # we will fill this dict to return with batch datalen = [] # store length of each sparse data instance ncoords = 0 # total number of points over all batches # first collect data data_v = [] for ibatch in xrange(self.batchsize): batch = None ntries = 0 while batch is None and ntries < 10: batch = self.feeder.get_batch_dict() ntries += 1 if batch is not None: data_v.append(batch) # now calc total points in each sparse image instance for data in data_v: datalen.append(data["ADCMasked"][0].shape[0]) ncoords += datalen[-1] # print "NCOORDS: ",ncoords # print "shape: ", data["ADCMasked"][0].shape # if len(data_v)>0 and data_v[0]["ADC"][0] is not None: # has_truth = True # else: # has_truth = False has_truth = True # make tensor for coords (row,col,batch) coord_t = torch.zeros((ncoords, 3), dtype=torch.int).to(device) # tensor for input pixel s input_t = torch.zeros((ncoords, 1), dtype=torch.float).to(device) # tensor for true values if has_truth: truth_t = torch.zeros((ncoords, 1), dtype=torch.float).to(device) else: truth_t = None # fill tensors above nfilled = 0 for ib, batch in enumerate(data_v): srcpix = batch["ADCMasked"][0] # print type(srcpix), # print srcpix.shape," " start = nfilled end = nfilled + datalen[ib] coord_t[start:end,0:2] \ = torch.from_numpy( srcpix[:,0:2].astype(np.int) ) coord_t[start:end, 2] = ib # print coord_t.shape," " input_t[start:end, 0] = torch.from_numpy(srcpix[:, 2]) if has_truth: truepix = batch["ADC"][0] truth_t[start:end, 0] = torch.from_numpy(truepix[:, 2]) nfilled += datalen[ib] flowdata = {"coord": coord_t, "ADCMasked": input_t, "ADC": truth_t} return flowdata
for ip in xrange(0,3): data["meta"][ip,0,0] = ev_adc.as_vector()[ip].meta().min_x() data["meta"][ip,0,1] = ev_adc.as_vector()[ip].meta().min_y() data["meta"][ip,0,2] = ev_adc.as_vector()[ip].meta().max_x() data["meta"][ip,0,3] = ev_adc.as_vector()[ip].meta().max_y() return data if __name__ == "__main__": batchsize = 4 nworkers = 4 print "start feeders" inputfile = "../testdata/smallsample/larcv_dlcosmictag_5482426_95_smallsample082918.root" feeder = LArCVServer(batchsize,"test",load_data,inputfile,nworkers,server_verbosity=0,worker_verbosity=0) print "wait for workers to load up" twait = 3 while twait>0: time.sleep(0.5) twait -= 1 print "twait: ",twait print "start receiving" nentries = 50 tstart = time.time() for n in xrange(nentries): batch = feeder.get_batch_dict() print "entry[",n,"] from ",batch["feeder"],": ",batch.keys() tend = time.time()-tstart
def load_ssnet_larcvdata( name, inputfile, batchsize, nworkers, tickbackward=False ): feeder = LArCVServer(batchsize,name,load_sparse_ssnetdata,inputfile,nworkers, server_verbosity=2,worker_verbosity=2,io_tickbackward=tickbackward) return feeder