Beispiel #1
0
import os, sys
import ROOT as rt
from larcv import larcv
from uresnet import UResNet
from larcvdataset import LArCVDataset

#net = UResNet( num_classes=3, input_channels=1, inplanes=16 )

# we load in a test image
#iotest = LArCVDataset("test_dataloader.cfg", "ThreadProcessorTest")
iotest = LArCVDataset("test_threadfiller.cfg", "ThreadProcessorTest")
iotest.start(1)

data = iotest[0]
print data
#print net
iotest.stop()
Beispiel #2
0
def main():

    global best_prec1

    # create model: loading resnet18 as defined in torchvision module
    #model = resnet_example.resnet18(pretrained=False, num_classes=5, input_channels=1)
    model = resnet_example.resnet14(pretrained=False,
                                    num_classes=5,
                                    input_channels=1)
    model.cuda()

    print "Loaded model: ", model

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    # training parameters
    lr = 1.0e-3
    momentum = 0.9
    weight_decay = 1.0e-3
    batchsize = 50
    batchsize_valid = 500
    start_epoch = 0
    epochs = 1500
    nbatches_per_epoch = 10000 / batchsize
    nbatches_per_valid = 1000 / batchsize_valid

    optimizer = torch.optim.SGD(model.parameters(),
                                lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

    cudnn.benchmark = True

    # dataset
    iotrain = LArCVDataset("train_dataloader.cfg",
                           "ThreadProcessor",
                           loadallinmem=True)
    iovalid = LArCVDataset("valid_dataloader.cfg", "ThreadProcessorTest")

    iotrain.start(batchsize)
    iovalid.start(batchsize_valid)

    # Resume training option
    if False:
        checkpoint = torch.load("checkpoint.pth.p01.tar")
        best_prec1 = checkpoint["best_prec1"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint['optimizer'])

    if False:
        data = iotrain[0]
        img = data["image"]
        lbl = data["label"]
        img_np = np.zeros((img.shape[0], 1, 256, 256), dtype=np.float32)
        lbl_np = np.zeros((lbl.shape[0]), dtype=np.int)
        for j in range(img.shape[0]):
            imgtemp = img[j].reshape((256, 256))
            print imgtemp.shape
            img_np[j, 0, :, :] = padandcrop(imgtemp)
            lbl_np[j] = np.argmax(lbl[j])

        print "Train label"
        print lbl_np

        datatest = iovalid[0]
        imgtest = data["image"]
        print "Test image shape"
        print imgtest.shape

        iotrain.stop()
        iovalid.stop()

        return

    for epoch in range(start_epoch, epochs):

        adjust_learning_rate(optimizer, epoch, lr)
        print "Epoch [%d]: " % (epoch),
        for param_group in optimizer.param_groups:
            print "lr=%.3e" % (param_group['lr']),
        print

        # train for one epoch
        try:
            train_ave_loss, train_ave_acc = train(iotrain, model, criterion,
                                                  optimizer,
                                                  nbatches_per_epoch, epoch,
                                                  50)
        except Exception, e:
            print "Error in training routine!"
            print e.message
            print e.__class__.__name__
            traceback.print_exc(e)
            break
        print "Epoch [%d] train aveloss=%.3f aveacc=%.3f" % (
            epoch, train_ave_loss, train_ave_acc)

        # evaluate on validation set
        try:
            prec1 = validate(iovalid, model, criterion, nbatches_per_valid, 1)
        except Exception, e:
            print "Error in validation routine!"
            print e.message
            print e.__class__.__name__
            traceback.print_exc(e)
            break
Beispiel #3
0
def main():

    global best_prec1
    global writer

    # create model, mark it to run on the GPU
    if GPUMODE:
        model = UResNet(inplanes=32,
                        input_channels=1,
                        num_classes=NCLASSES,
                        showsizes=False)
        model.to(device=torch.device(DEVICE))  # put onto gpuid
    else:
        model = UResNet(inplanes=32, input_channels=1, num_classes=NCLASSES)

    # Resume training option
    if RESUME_FROM_CHECKPOINT:
        print "RESUMING FROM CHECKPOINT FILE ", CHECKPOINT_FILE
        checkpoint = torch.load(
            CHECKPOINT_FILE,
            map_location=CHECKPOINT_MAP_LOCATIONS)  # load weights to gpuid
        best_prec1 = checkpoint["best_prec1"]
        if CHECKPOINT_FROM_DATA_PARALLEL:
            model = nn.DataParallel(
                model, device_ids=DEVICE_IDS)  # distribute across device_ids
        model.load_state_dict(checkpoint["state_dict"])

    if not CHECKPOINT_FROM_DATA_PARALLEL and len(DEVICE_IDS) > 1:
        model = nn.DataParallel(
            model, device_ids=DEVICE_IDS)  # distribute across device_ids

    # uncomment to dump model
    print "Loaded model: ", model
    # check where model pars are
    #for p in model.parameters():
    #    print p.is_cuda

    # define loss function (criterion) and optimizer
    if GPUMODE:
        criterion = PixelWiseNLLLoss()
        criterion.to(device=torch.device(DEVICE))
    else:
        criterion = PixelWiseNLLLoss()

    # training parameters
    lr = 1.0e-5
    momentum = 0.9
    weight_decay = 1.0e-4

    # training length
    if "cuda" in DEVICE:
        batchsize_train = 4 * len(DEVICE_IDS)
        batchsize_valid = 2 * len(DEVICE_IDS)
    else:
        batchsize_train = 4
        batchsize_valid = 2

    start_epoch = 0
    epochs = 10
    num_iters = 30000
    iter_per_epoch = None  # determined later
    iter_per_valid = 10
    iter_per_checkpoint = 500

    nbatches_per_itertrain = 20
    itersize_train = batchsize_train * nbatches_per_itertrain
    trainbatches_per_print = 100

    nbatches_per_itervalid = 40
    itersize_valid = batchsize_valid * nbatches_per_itervalid
    validbatches_per_print = 100

    # SETUP OPTIMIZER

    # SGD w/ momentum
    #optimizer = torch.optim.SGD(model.parameters(), lr,
    #                            momentum=momentum,
    #                            weight_decay=weight_decay)

    # ADAM
    # betas default: (0.9, 0.999) for (grad, grad^2). smoothing coefficient for grad. magnitude calc.
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)

    # optimize algorithms based on input size (good if input size is constant)
    cudnn.benchmark = True

    # LOAD THE DATASET

    iotrain = LArCVDataset(TRAIN_LARCV_CONFIG, "ThreadProcessorTrain")
    iovalid = LArCVDataset(VALID_LARCV_CONFIG, "ThreadProcessorValid")
    iotrain.start(batchsize_train)
    iovalid.start(batchsize_valid)
    iosample = {"valid": iovalid, "train": iotrain}

    NENTRIES = len(iotrain)
    print "Number of entries in training set: ", NENTRIES

    if NENTRIES > 0:
        iter_per_epoch = NENTRIES / (itersize_train)
        if num_iters is None:
            # we set it by the number of request epochs
            num_iters = (epochs - start_epoch) * NENTRIES
        else:
            epochs = num_iters / NENTRIES
    else:
        iter_per_epoch = 1

    print "Number of epochs: ", epochs
    print "Iter per epoch: ", iter_per_epoch

    if False:
        # for debugging/testing data
        sample = "train"
        print "TEST BATCH: sample=", sample
        adc_t, label_t, weight_t = prep_data(iosample[sample], sample,
                                             batchsize_train, IMAGE_WIDTH,
                                             IMAGE_HEIGHT, ADC_THRESH)
        print "adc shape: ", adc_t.shape
        print "label shape: ", label_t.shape
        print "weight shape: ", weight_t.shape

        # load opencv, to dump png of image
        import cv2 as cv
        cv.imwrite("testout_adc.png", adc_t.numpy()[0, 0, :, :])
        cv.imwrite("testout_label.png", label_t.numpy()[0, :, :])
        cv.imwrite("testout_weight.png", weight_t.numpy()[0, 0, :, :])

        print "STOP FOR DEBUGGING"
        iotrain.stop()
        iovalid.stop()
        sys.exit(-1)

    with torch.autograd.profiler.profile(enabled=RUNPROFILER) as prof:

        # Resume training option
        #if RESUME_FROM_CHECKPOINT:
        #    print "RESUMING FROM CHECKPOINT FILE ",CHECKPOINT_FILE
        #    checkpoint = torch.load( CHECKPOINT_FILE, map_location=CHECKPOINT_MAP_LOCATIONS )
        #    best_prec1 = checkpoint["best_prec1"]
        #    model.load_state_dict(checkpoint["state_dict"])
        #optimizer.load_state_dict(checkpoint['optimizer'])
        #if GPUMODE:
        #    optimizer.cuda(GPUID)

        for ii in range(start_iter, num_iters):

            adjust_learning_rate(optimizer, ii, lr)
            print "MainLoop Iter:%d Epoch:%d.%d " % (ii, ii / iter_per_epoch,
                                                     ii % iter_per_epoch),
            for param_group in optimizer.param_groups:
                print "lr=%.3e" % (param_group['lr']),
                print

            # train for one iteration
            try:
                train_ave_loss, train_ave_acc = train(iotrain, batchsize_train,
                                                      model, criterion,
                                                      optimizer,
                                                      nbatches_per_itertrain,
                                                      ii, NCLASSES,
                                                      trainbatches_per_print)
            except Exception, e:
                print "Error in training routine!"
                print e.message
                print e.__class__.__name__
                traceback.print_exc(e)
                break
            print "Train Iter:%d Epoch:%d.%d train aveloss=%.3f aveacc=%.3f" % (
                ii, ii / iter_per_epoch, ii % iter_per_epoch, train_ave_loss,
                train_ave_acc)

            # evaluate on validation set
            if ii % iter_per_valid == 0:
                try:
                    prec1 = validate(iovalid, batchsize_valid, model,
                                     criterion, nbatches_per_itervalid,
                                     validbatches_per_print, ii)
                except Exception, e:
                    print "Error in validation routine!"
                    print e.message
                    print e.__class__.__name__
                    traceback.print_exc(e)
                    break

                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)

                # check point for best model
                if is_best:
                    print "Saving best model"
                    save_checkpoint(
                        {
                            'iter': ii,
                            'epoch': ii / iter_per_epoch,
                            'state_dict': model.state_dict(),
                            'best_prec1': best_prec1,
                            'optimizer': optimizer.state_dict(),
                        }, is_best, -1)

            # periodic checkpoint
            if ii > 0 and ii % iter_per_checkpoint == 0:
                print "saving periodic checkpoint"
                save_checkpoint(
                    {
                        'iter': ii,
                        'epoch': ii / iter_per_epoch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer': optimizer.state_dict(),
                    }, False, ii)
            # flush the print buffer after iteration
            sys.stdout.flush()
Beispiel #4
0
def main():

    global best_prec1_vis
    global best_prec1_flow
    global writer
    
    model = network.mymodel( num_classes=1, input_channels=1, showsizes=False)
    model.cuda()
    #print "Loaded model: ",model

    # define loss function (criterion) and optimizer
    criterion1 = myfunc.PixelWiseFlowLoss(minval=4).cuda()
    criterion2 = myfunc.PixelWiseNLLLoss().cuda()
    
    # training parameters
    lmbd = 0.5
    lr = 1.0e-4 #-3 
    momentum = 0.9
    weight_decay = 1.0e-3
    batchsize = 8
    batchsize_valid = 8
    start_epoch = 0
    epochs      = 50 #1500
    if len(sys.argv)>1:
        epochs = int(sys.argv[1])
    print "Number of epochs: ", epochs
    print "Train batch: ", batchsize
    

    optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

    cudnn.benchmark = True

    # dataset
    #iotrain = LArCVDataset("train_dataloader.cfg", "ThreadProcessor", loadallinmem=True)
    iotrain = LArCVDataset("train_dataloader.cfg", "ThreadProcessor")
    iovalid = LArCVDataset("valid_dataloader.cfg", "ThreadProcessorTest")
    
    iotrain.start(batchsize)
    iovalid.start(batchsize_valid)

    #nbatch per epoch
    NENTRIES = iotrain.io.fetch_n_entries()
    #NENTRIES=0;
    if NENTRIES>0:
            nbatches_per_epoch = NENTRIES/batchsize
            nbatches_per_valid = NENTRIES/batchsize_valid
    else:
            nbatches_per_epoch = 1
            nbatches_per_valid = 1
                
                
    # Resume training option
    if False:
        checkpoint = torch.load( "checkpoint.pth.p01.tar" )
        best_prec1 = checkpoint["best_prec1"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint['optimizer'])
    
    if False: #debug
        data = iotrain[0]
        img  = data["imageY"]
        img2 = data["imageU"]
        lbl  = data["label"]
        vis  = data["match"]
        '''
        img_np  = np.zeros( (img.shape[0],  1, 512, 512), dtype=np.float32 )
        img2_np = np.zeros( (img2.shape[0], 1, 512, 512), dtype=np.float32 )
        lbl_np  = np.zeros( (lbl.shape[0], 1, 512, 512), dtype=np.int )
        vis_np  = np.zeros( (vis.shape[0], 512, 512), dtype=np.int )
        fvis_np  = np.zeros( (vis.shape[0], 1, 512, 512), dtype=np.float32 )

        for j in range(img.shape[0]):
            img_np[j,0,:,:]  = img[j].reshape( (512,512) )
            img2_np[j,0,:,:] = img2[j].reshape( (512,512) )
            lbl_np[j,0,:,:]  = lbl[j].reshape( (512,512) )
            vis_np[j,:,:]    = vis[j].reshape( (512,512) )
            fvis_np[j,0,:,:]  = vis[j].reshape( (512,512) ) 
        '''
        img_np  = np.zeros( ( 512, 512), dtype=np.float32 )
        img2_np = np.zeros( ( 512, 512), dtype=np.float32 )
        lbl_np  = np.zeros( ( 512, 512), dtype=np.int )
        vis_np  = np.zeros( ( 512, 512), dtype=np.int )
        fvis_np = np.zeros( ( 512, 512), dtype=np.float32 )

        for j in range(1):#img.shape[0]):
            img_np[:,:]  = img[j].reshape( (512,512) )
            img2_np[:,:] = img2[j].reshape( (512,512) )
            lbl_np[:,:]  = lbl[j].reshape( (512,512) )
            vis_np[:,:]  = vis[j].reshape( (512,512) )
            fvis_np[:,:] = vis[j].reshape( (512,512) ) 

        tar_x_visi = np.multiply(lbl_np,fvis_np)
        abs_tar_x_visi = np.fabs(tar_x_visi)
        thresh =   abs_tar_x_visi >0
        threshint = thresh.astype(int)
        
        datatest = iovalid[0]
        imgtest = datatest["imageYtest"]
        print "Test image shape"
        print imgtest.shape

        cv.imwrite( "testout_srcY.png", img_np  )
        cv.imwrite( "testout_srcU.png", img2_np  )
        cv.imwrite( "testout_tar.png", lbl_np  )
        cv.imwrite( "testout_vis.png", fvis_np*100  )
        cv.imwrite( "testout_tarXvis.png", tar_x_visi  )
        cv.imwrite( "testout_abs_tarXvis.png", abs_tar_x_visi*100  )
        cv.imwrite( "testout_thresh_tarXvis.png", threshint*100  )
        
        iotrain.stop()
        iovalid.stop()
        
        return


    #data = iotrain[0]
    #data2 = iovalid[0]
    for epoch in range(start_epoch, epochs):

        myfunc.adjust_learning_rate(optimizer, epoch, lr)
        print "Epoch [%d]: "%(epoch),
        for param_group in optimizer.param_groups:
            print "lr=%.3e"%(param_group['lr']),
        print

        # train for one epoch
        try:
            train_ave_loss, train_ave_acc_vis, train_ave_acc_flow = train(iotrain, model, criterion1, criterion2, lmbd, optimizer, nbatches_per_epoch, epoch, 100)
            #train_ave_loss, train_ave_acc_vis, train_ave_acc_flow = train(data, model, criterion1, criterion2, lmbd, optimizer, nbatches_per_epoch, epoch, 50)
        except Exception,e:
            print "Error in training routine!"            
            print e.message
            print e.__class__.__name__
            traceback.print_exc(e)
            break
        print "Epoch [%d] train aveloss=%.3f aveacc_vis=%.3f aveacc_flow=%.3f"%(epoch,train_ave_loss,train_ave_acc_vis,train_ave_acc_flow)

        # evaluate on validation set
        try:
            prec1_vis, prec1_flow = validate(iovalid, model, criterion1, criterion2, lmbd, nbatches_per_valid, epoch, 100)
            #prec1_vis, prec1_flow = validate(data2, model, criterion1, criterion2, lmbd, nbatches_per_valid, epoch, 50)
        except Exception,e:
            print "Error in validation routine!"            
            print e.message
            print e.__class__.__name__
            traceback.print_exc(e)
            break