コード例 #1
0
    def __init__( self, input_larcv_files, input_ana_files, npairs=20000, use_triplets=True ):

        self.input_larcv_files = input_larcv_files
        self.input_ana_files   = input_ana_files
        
        # load chain
        self.match_v = std.vector("larflow::FlowMatchMap")()
        self.tchain = rt.TChain("flowmatchdata")
        for fin in input_ana_files:
            print "adding ana file: ",fin
            self.tchain.Add( fin )
        self.tchain.SetBranchAddress( "matchmap", rt.AddressOf(self.match_v))
        print "chain has ",self.tchain.GetEntries()," entries"

        self.params = {"has_truth":True,
                       "verbose":False,
                       "npairs":npairs,
                       "matchtree":self.tchain,
                       "match_v":self.match_v}
        
        self.nworkers = 1
        self.feeder = LArCVServer(1,"larmatchfeed",
                                  load_larmatch_data,
                                  self.input_larcv_files,
                                  self.nworkers,
                                  io_tickbackward=False,
                                  func_params=self.params)
コード例 #2
0
    def __init__(self,
                 inputfile,
                 batchsize,
                 input_producer_name,
                 true_producer_name,
                 plane,
                 tickbackward=False,
                 nworkers=4,
                 readonly_products=None,
                 feedername=None):
        super(SparseInfillPyTorchDataset, self).__init__()

        if type(inputfile) is str:
            self.inputfiles = [inputfile]
        elif type(inputfile) is list:
            self.inputfiles = inputfile

        if type(input_producer_name) is not str:
            raise ValueError("producer_name type must be str")

        # get length by querying the tree
        self.nentries = 0
        tchain = rt.TChain("sparseimg_{}_tree".format(input_producer_name))
        for finput in self.inputfiles:
            tchain.Add(finput)
        self.nentries = tchain.GetEntries()
        #print "nentries: ",self.nentries
        del tchain

        if feedername is None:
            self.feedername = "SparseInfillImagePyTorchDataset_%d"%\
                                (SparseImagePyTorchDataset.idCounter)
        else:
            self.feedername = feedername
        self.batchsize = batchsize
        self.nworkers = nworkers
        readonly_products = None
        params = {
            "inputproducer": input_producer_name,
            "trueproducer": true_producer_name,
            "plane": plane
        }

        # note, with way LArCVServer workers, must always use batch size of 1
        #   because larcvserver expects entries in each batch to be same size,
        #   but in sparse representations this is not true
        # we must put batches together ourselves for sparseconv operations
        self.feeder = LArCVServer(1,
                                  self.feedername,
                                  load_cropped_sparse_infill,
                                  self.inputfiles,
                                  self.nworkers,
                                  server_verbosity=-1,
                                  worker_verbosity=-1,
                                  io_tickbackward=tickbackward,
                                  func_params=params)

        SparseInfillPyTorchDataset.idCounter += 1
コード例 #3
0
    def __init__(self,inputfile,batchsize,tickbackward=False,nworkers=4):
        super(SparseImagePyTorchDataset,self).__init__()

        if type(inputfile) is str:
            self.inputfiles = [inputfile]
        elif type(inputfile) is list:
            self.inputfiles = inputfile
        
        # get length by querying the tree
        self.nentries  = 0
        tchain = rt.TChain("image2d_wire_tree")
        for finput in self.inputfiles:
            tchain.Add(finput)
        self.nentries = tchain.GetEntries()
        del tchain
        
        self.feedername = "SparseImagePyTorchDataset_%d"%(SparseImagePyTorchDataset.idCounter)
        self.batchsize = batchsize
        self.nworkers  = nworkers
        self.feeder = LArCVServer(self.batchsize,self.feedername,
                                  load_sparse_ssnetdata,self.inputfiles,self.nworkers,
                                  server_verbosity=0,worker_verbosity=0,
                                  io_tickbackward=tickbackward)
        SparseImagePyTorchDataset.idCounter += 1
コード例 #4
0
class SparseImagePyTorchDataset(torchdata.Dataset):
    idCounter = 0
    def __init__(self,inputfile,batchsize,tickbackward=False,nworkers=4):
        super(SparseImagePyTorchDataset,self).__init__()

        if type(inputfile) is str:
            self.inputfiles = [inputfile]
        elif type(inputfile) is list:
            self.inputfiles = inputfile
        
        # get length by querying the tree
        self.nentries  = 0
        tchain = rt.TChain("image2d_wire_tree")
        for finput in self.inputfiles:
            tchain.Add(finput)
        self.nentries = tchain.GetEntries()
        del tchain
        
        self.feedername = "SparseImagePyTorchDataset_%d"%(SparseImagePyTorchDataset.idCounter)
        self.batchsize = batchsize
        self.nworkers  = nworkers
        self.feeder = LArCVServer(self.batchsize,self.feedername,
                                  load_sparse_ssnetdata,self.inputfiles,self.nworkers,
                                  server_verbosity=0,worker_verbosity=0,
                                  io_tickbackward=tickbackward)
        SparseImagePyTorchDataset.idCounter += 1

    def __len__(self):
        #print "return length of sample:",self.nentries
        return self.nentries

    def __getitem__(self,index):
        """ we do not have a way to get the index (can change that)"""
        #print "called get item for index=",index," ",self.feeder.identity,"pid=",os.getpid()
        data = self.feeder.get_batch_dict()        
        # remove the feeder variable
        del data["feeder"]
        #print "called get item: ",data.keys()
        return data
コード例 #5
0
class LArMatchDataset:

    def __init__( self, input_larcv_files, input_ana_files, npairs=20000, use_triplets=True ):

        self.input_larcv_files = input_larcv_files
        self.input_ana_files   = input_ana_files
        
        # load chain
        self.match_v = std.vector("larflow::FlowMatchMap")()
        self.tchain = rt.TChain("flowmatchdata")
        for fin in input_ana_files:
            print "adding ana file: ",fin
            self.tchain.Add( fin )
        self.tchain.SetBranchAddress( "matchmap", rt.AddressOf(self.match_v))
        print "chain has ",self.tchain.GetEntries()," entries"

        self.params = {"has_truth":True,
                       "verbose":False,
                       "npairs":npairs,
                       "matchtree":self.tchain,
                       "match_v":self.match_v}
        
        self.nworkers = 1
        self.feeder = LArCVServer(1,"larmatchfeed",
                                  load_larmatch_data,
                                  self.input_larcv_files,
                                  self.nworkers,
                                  io_tickbackward=False,
                                  func_params=self.params)

    def __len__(self):
        return int(self.params["matchtree"].GetEntries())

    def gettensorbatch(self,batchsize,device):

        batches = []
        source_npts  = []
        target1_npts = []
        target2_npts = []
        pair1_npts   = []
        pair2_npts   = []
        source_tot = 0
        target1_tot = 0
        target2_tot = 0
        pair1_tot   = 0
        pair2_tot   = 0
        for ibatch in range(batchsize):
            data = self.feeder.get_batch_dict()

            source_npts.append(  data["coord_source"][0].shape[0] )
            source_tot += source_npts[-1]
            
            target1_npts.append( data["coord_target1"][0].shape[0] )
            target1_tot += target1_npts[-1]

            target2_npts.append( data["coord_target2"][0].shape[0] )
            target2_tot += target2_npts[-1]

            pair1_npts.append(  int(data["npairs_flow1"][0]) )
            pair1_tot += pair1_npts[-1]

            pair2_npts.append(  int(data["npairs_flow2"][0]) )
            pair2_tot += pair2_npts[-1]            

            batches.append( data )

        tdata = {"coord_source": np.zeros( (source_tot,3),  dtype=np.int32 ),
                 "coord_target1":np.zeros( (target1_tot,3), dtype=np.int32 ),
                 "coord_target2":np.zeros( (target2_tot,3), dtype=np.int32 ),
                 "feat_source":  np.zeros( (source_tot,1),  dtype=np.float32 ),
                 "feat_target1": np.zeros( (target1_tot,1), dtype=np.float32 ),
                 "feat_target2": np.zeros( (target2_tot,1), dtype=np.float32 ),
                 "pairs_flow1":  [],  
                 "pairs_flow2":  [],  
                 "entries":[],
                 "npairs1":[],
                 "npairs2":[]
                 }

        source_start  = 0
        target1_start = 0
        target2_start = 0
        npair1_start = 0
        npair2_start = 0
        for ibatch,data in enumerate(batches):
            source_end  = source_start  + source_npts[ibatch]
            target1_end = target1_start + target1_npts[ibatch]
            target2_end = target2_start + target2_npts[ibatch]
            tdata["coord_source"][source_start:source_end,0:2]    = data["coord_source"][0][:,0:2]
            tdata["coord_target1"][target1_start:target1_end,0:2] = data["coord_target1"][0][:,0:2]
            tdata["coord_target2"][target2_start:target2_end,0:2] = data["coord_target2"][0][:,0:2]
            tdata["feat_source"][source_start:source_end,0]     = data["feat_source"][0][:]
            tdata["feat_target1"][target1_start:target1_end,0]  = data["feat_target1"][0][:]
            tdata["feat_target2"][target2_start:target2_end,0]  = data["feat_target2"][0][:]
            source_start  = source_end
            target1_start = target1_end
            target2_start = target2_end

            tdata["pairs_flow1"].append( torch.from_numpy(data["matchpairs_flow1"][0][0:pair1_npts[ibatch],:]).to(device) )
            tdata["pairs_flow2"].append( torch.from_numpy(data["matchpairs_flow2"][0][0:pair2_npts[ibatch],:]).to(device) )
            tdata["entries"].append( data["entry"][0] )
            tdata["npairs1"].append( data["npairs_flow1"][0] )
            tdata["npairs2"].append( data["npairs_flow2"][0] )
            
        for name,arr_np in tdata.items():
            if type(arr_np) is np.ndarray:
                tdata[name] = torch.from_numpy( arr_np ).to(device)
            
        return tdata
コード例 #6
0
def main():

    global best_prec1
    global writer

    if GPUMODE:
        DEVICE = torch.device("cuda:%d"%(DEVICE_IDS[0]))
    else:
        DEVICE = torch.device("cpu")
    
    # create model, mark it to run on the GPU
    model = LArFlowUResNet( num_classes=2, input_channels=1,
                            layer_channels=[16,32,64,128],
                            layer_strides= [ 2, 2, 2,  2],
                            num_final_features=64,
                            use_deconvtranspose=False,
                            onlyone_res=True,
                            showsizes=False,
                            use_visi=False,
                            use_grad_checkpoints=True,
                            gpuid1=DEVICE_IDS[0],
                            gpuid2=DEVICE_IDS[0] )
    
    # Resume training option
    if RESUME_FROM_CHECKPOINT:
        print "RESUMING FROM CHECKPOINT FILE ",CHECKPOINT_FILE
        checkpoint = torch.load( CHECKPOINT_FILE, map_location=CHECKPOINT_MAP_LOCATIONS ) # load weights to gpuid
        best_prec1 = checkpoint["best_prec1"]
        if CHECKPOINT_FROM_DATA_PARALLEL:
            model = nn.DataParallel( model, device_ids=DEVICE_IDS ) # distribute across device_ids
        model.load_state_dict(checkpoint["state_dict"])

    if not CHECKPOINT_FROM_DATA_PARALLEL and len(DEVICE_IDS)>1:
        model = nn.DataParallel( model, device_ids=DEVICE_IDS ).to(device=DEVICE) # distribute across device_ids
    else:
        model = model.to(device=DEVICE)

    # uncomment to dump model
    if False:
        print "Loaded model: ",model
        return

    # define loss function (criterion) and optimizer
    maxdist = 200.0
    criterion = LArFlowCombinedLoss(IMAGE_WIDTH,IMAGE_HEIGHT,BATCHSIZE,maxdist,
                                    VISI_WEIGHT,CONSISTENCY_WEIGHT).to(device=DEVICE)

    # training parameters
    lr = 1.0e-3
    momentum = 0.9
    weight_decay = 1.0e-4

    # training length
    batchsize_train = BATCHSIZE
    batchsize_valid = BATCHSIZE_VALID#*len(DEVICE_IDS)
    start_epoch = 0
    epochs      = 10
    num_iters   = 10000
    iter_per_epoch = None # determined later
    iter_per_valid = 10


    nbatches_per_itertrain = 20
    itersize_train         = batchsize_train*nbatches_per_itertrain
    trainbatches_per_print = -1
    
    nbatches_per_itervalid = 40
    itersize_valid         = batchsize_valid*nbatches_per_itervalid
    validbatches_per_print = -1

    # SETUP OPTIMIZER

    # SGD w/ momentum
    #optimizer = torch.optim.SGD(model.parameters(), lr,
    #                            momentum=momentum,
    #                            weight_decay=weight_decay)
    
    # ADAM
    # betas default: (0.9, 0.999) for (grad, grad^2). smoothing coefficient for grad. magnitude calc.
    #optimizer = torch.optim.Adam(model.parameters(), 
    #                             lr=lr, 
    #                             weight_decay=weight_decay)
    # RMSPROP
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=lr,
                                    weight_decay=weight_decay)
    
    # optimize algorithms based on input size (good if input size is constant)
    cudnn.benchmark = True

    # LOAD THE DATASET    
    iotrain = LArCVServer(batchsize_train,"train",load_data,INPUTFILE,6)
    iovalid = LArCVServer(batchsize_valid,"valid",load_data,INPUTFILE,2)

    print "pause to give time to feeders"

    #NENTRIES = len(iotrain)
    NENTRIES = 100000
    print "Number of entries in training set: ",NENTRIES

    if NENTRIES>0:
        iter_per_epoch = NENTRIES/(itersize_train)
        if num_iters is None:
            # we set it by the number of request epochs
            num_iters = (epochs-start_epoch)*NENTRIES
        else:
            epochs = num_iters/NENTRIES
    else:
        iter_per_epoch = 1

    print "Number of epochs: ",epochs
    print "Iter per epoch: ",iter_per_epoch

    
    if False:
        # for debugging/testing data
        sample = "train"
        iosample = {"valid":iovalid,
                    "train":iotrain}
        print "TEST BATCH: sample=",sample
        source,target1,target2,flow1,flow2,visi1,visi2,fvisi1,fvisi2,Wminx,Uminx,Vminx = prep_data( iosample[sample], batchsize_train, 
                                                                                                    IMAGE_WIDTH, IMAGE_HEIGHT, ADC_THRESH, DEVICE )
        # load opencv
        print "Print using OpenCV"
        import cv2 as cv
        cv.imwrite( "testout_source.png",  source.cpu().numpy()[0,0,:,:] )
        cv.imwrite( "testout_target1.png", target1.cpu().numpy()[0,0,:,:] )
        cv.imwrite( "testout_target2.png", target2.cpu().numpy()[0,0,:,:] )
        print "source shape: ",source.cpu().numpy().shape
        print "minX-src: ",Wminx
        print "minX-U: ",Uminx
        print "minX-V: ",Vminx

        sample = "valid"
        print "TEST BATCH: sample=",sample
        source,target1,target2,flow1,flow2,visi1,visi2,fvisi1,fvisi2,Wminx,Uminx,Vminx = prep_data( iosample[sample], batchsize_valid, 
                                                                                                    IMAGE_WIDTH, IMAGE_HEIGHT, ADC_THRESH, DEVICE )       
        
        print "STOP FOR DEBUGGING"
        sys.exit(-1)

    with torch.autograd.profiler.profile(enabled=RUNPROFILER) as prof:

        # Resume training option
        #if RESUME_FROM_CHECKPOINT:
        #    print "RESUMING FROM CHECKPOINT FILE ",CHECKPOINT_FILE
        #    checkpoint = torch.load( CHECKPOINT_FILE, map_location=CHECKPOINT_MAP_LOCATIONS )
        #    best_prec1 = checkpoint["best_prec1"]
        #    model.load_state_dict(checkpoint["state_dict"])
        #optimizer.load_state_dict(checkpoint['optimizer'])
        #if GPUMODE:
        #    optimizer.cuda(GPUID)

        for ii in range(start_iter, num_iters):

            adjust_learning_rate(optimizer, ii, lr)
            print "MainLoop Iter:%d Epoch:%d.%d "%(ii,ii/iter_per_epoch,ii%iter_per_epoch),
            for param_group in optimizer.param_groups:
                print "lr=%.3e"%(param_group['lr']),
                print

            # train for one iteration
            try:
                _ = train(iotrain, DEVICE, batchsize_train, model,
                          criterion, optimizer,
                          nbatches_per_itertrain, ii, trainbatches_per_print)
                
            except Exception,e:
                print "Error in training routine!"            
                print e.message
                print e.__class__.__name__
                traceback.print_exc(e)
                break

            # evaluate on validation set
            if ii%iter_per_valid==0:
                try:
                    totloss, flow1acc5, flow2acc5 = validate(iovalid, DEVICE, batchsize_valid, model, criterion, nbatches_per_itervalid, ii, validbatches_per_print)
                except Exception,e:
                    print "Error in validation routine!"            
                    print e.message
                    print e.__class__.__name__
                    traceback.print_exc(e)
                    break

                # remember best prec@1 and save checkpoint
                prec1   = 0.5*(flow1acc5+flow2acc5)
                is_best =  prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)

                # check point for best model
                if is_best:
                    print "Saving best model"
                    save_checkpoint({
                        'iter':ii,
                        'epoch': ii/iter_per_epoch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer' : optimizer.state_dict(),
                    }, is_best, -1)

            # periodic checkpoint
            if ii>0 and ii%ITER_PER_CHECKPOINT==0:
                print "saving periodic checkpoint"
                save_checkpoint({
                    'iter':ii,
                    'epoch': ii/iter_per_epoch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'optimizer' : optimizer.state_dict(),
                }, False, ii)
            # flush the print buffer after iteration
            sys.stdout.flush()
コード例 #7
0
class SparseInfillPyTorchDataset(torchdata.Dataset):
    idCounter = 0

    def __init__(self,
                 inputfile,
                 batchsize,
                 input_producer_name,
                 true_producer_name,
                 plane,
                 tickbackward=False,
                 nworkers=4,
                 readonly_products=None,
                 feedername=None):
        super(SparseInfillPyTorchDataset, self).__init__()

        if type(inputfile) is str:
            self.inputfiles = [inputfile]
        elif type(inputfile) is list:
            self.inputfiles = inputfile

        if type(input_producer_name) is not str:
            raise ValueError("producer_name type must be str")

        # get length by querying the tree
        self.nentries = 0
        tchain = rt.TChain("sparseimg_{}_tree".format(input_producer_name))
        for finput in self.inputfiles:
            tchain.Add(finput)
        self.nentries = tchain.GetEntries()
        #print "nentries: ",self.nentries
        del tchain

        if feedername is None:
            self.feedername = "SparseInfillImagePyTorchDataset_%d"%\
                                (SparseImagePyTorchDataset.idCounter)
        else:
            self.feedername = feedername
        self.batchsize = batchsize
        self.nworkers = nworkers
        readonly_products = None
        params = {
            "inputproducer": input_producer_name,
            "trueproducer": true_producer_name,
            "plane": plane
        }

        # note, with way LArCVServer workers, must always use batch size of 1
        #   because larcvserver expects entries in each batch to be same size,
        #   but in sparse representations this is not true
        # we must put batches together ourselves for sparseconv operations
        self.feeder = LArCVServer(1,
                                  self.feedername,
                                  load_cropped_sparse_infill,
                                  self.inputfiles,
                                  self.nworkers,
                                  server_verbosity=-1,
                                  worker_verbosity=-1,
                                  io_tickbackward=tickbackward,
                                  func_params=params)

        SparseInfillPyTorchDataset.idCounter += 1

    def __len__(self):
        #print "return length of sample:",self.nentries
        return self.nentries

    def __getitem__(self, index):
        """ we do not have a way to get the index (can change that)"""
        #print "called get item for index=",index," ",self.feeder.identity,"pid=",os.getpid()
        data = self.feeder.get_batch_dict()
        # remove the feeder variable
        del data["feeder"]
        #print "called get item: ",data.keys()
        return data

    def get_tensor_batch(self, device):
        """
        get batch, convert into torch tensors

        inputs
        -------
        device: torch.device specifies either gpu or cpu

        output
        -------
        data [dict of torch tensors]
        """

        # we will fill this dict to return with batch
        datalen = []  # store length of each sparse data instance
        ncoords = 0  # total number of points over all batches

        # first collect data
        data_v = []
        for ibatch in xrange(self.batchsize):
            batch = None
            ntries = 0
            while batch is None and ntries < 10:
                batch = self.feeder.get_batch_dict()
                ntries += 1
            if batch is not None:
                data_v.append(batch)

        # now calc total points in each sparse image instance
        for data in data_v:
            datalen.append(data["ADCMasked"][0].shape[0])
            ncoords += datalen[-1]
        # print "NCOORDS: ",ncoords
        # print "shape: ", data["ADCMasked"][0].shape

        # if len(data_v)>0 and data_v[0]["ADC"][0] is not None:
        #     has_truth = True
        # else:
        #     has_truth = False
        has_truth = True

        # make tensor for coords (row,col,batch)
        coord_t = torch.zeros((ncoords, 3), dtype=torch.int).to(device)

        # tensor for input pixel s
        input_t = torch.zeros((ncoords, 1), dtype=torch.float).to(device)

        # tensor for true values
        if has_truth:
            truth_t = torch.zeros((ncoords, 1), dtype=torch.float).to(device)
        else:
            truth_t = None

        # fill tensors above
        nfilled = 0
        for ib, batch in enumerate(data_v):
            srcpix = batch["ADCMasked"][0]
            # print type(srcpix),
            # print srcpix.shape," "

            start = nfilled
            end = nfilled + datalen[ib]
            coord_t[start:end,0:2] \
                = torch.from_numpy( srcpix[:,0:2].astype(np.int) )
            coord_t[start:end, 2] = ib
            # print coord_t.shape," "
            input_t[start:end, 0] = torch.from_numpy(srcpix[:, 2])

            if has_truth:
                truepix = batch["ADC"][0]
                truth_t[start:end, 0] = torch.from_numpy(truepix[:, 2])

            nfilled += datalen[ib]

        flowdata = {"coord": coord_t, "ADCMasked": input_t, "ADC": truth_t}

        return flowdata
コード例 #8
0
ファイル: test_server.py プロジェクト: LArbys/larcvdataset
    for ip in xrange(0,3):
        data["meta"][ip,0,0] = ev_adc.as_vector()[ip].meta().min_x()
        data["meta"][ip,0,1] = ev_adc.as_vector()[ip].meta().min_y()
        data["meta"][ip,0,2] = ev_adc.as_vector()[ip].meta().max_x()
        data["meta"][ip,0,3] = ev_adc.as_vector()[ip].meta().max_y()
            
    return data
    

if __name__ == "__main__":

    batchsize = 4
    nworkers  = 4
    print "start feeders"
    inputfile = "../testdata/smallsample/larcv_dlcosmictag_5482426_95_smallsample082918.root"
    feeder = LArCVServer(batchsize,"test",load_data,inputfile,nworkers,server_verbosity=0,worker_verbosity=0)

    print "wait for workers to load up"
    twait = 3
    while twait>0:
        time.sleep(0.5)
        twait -= 1
        print "twait: ",twait
    
    print "start receiving"
    nentries = 50
    tstart = time.time()
    for n in xrange(nentries):
        batch = feeder.get_batch_dict()
        print "entry[",n,"] from ",batch["feeder"],": ",batch.keys()
    tend = time.time()-tstart
コード例 #9
0
def load_ssnet_larcvdata( name, inputfile, batchsize, nworkers, tickbackward=False ):
    feeder = LArCVServer(batchsize,name,load_sparse_ssnetdata,inputfile,nworkers,
                         server_verbosity=2,worker_verbosity=2,io_tickbackward=tickbackward)
    return feeder