Ejemplo n.º 1
0
def test_epoch(model,
               test_loader,
               result_collector,
               device=torch.device('cuda'),
               dtype=torch.float):
    model.eval()

    with torch.no_grad():
        for batch_idx, (inputs, extra) in enumerate(test_loader):
            outputs = model(inputs.to(device, dtype))
            result_collector((inputs, outputs, extra))

            progress_bar(batch_idx, len(test_loader))
Ejemplo n.º 2
0
def val_epoch(model,
              criterion,
              val_loader,
              device=torch.device('cuda'),
              dtype=torch.float):
    model.eval()
    val_loss = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            inputs, targets = inputs.to(device,
                                        dtype), targets.to(device, dtype)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            val_loss += loss.item()
            progress_bar(batch_idx, len(val_loader),
                         'Loss: {0:.4e}'.format(val_loss / (batch_idx + 1)))
Ejemplo n.º 3
0
def train_epoch(model,
                criterion,
                optimizer,
                train_loader,
                device=torch.device('cuda'),
                dtype=torch.float):
    model.train()
    train_loss = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device, dtype), targets.to(device, dtype)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        progress_bar(batch_idx, len(train_loader),
                     'Loss: {0:.4e}'.format(train_loss / (batch_idx + 1)))
Ejemplo n.º 4
0
Archivo: solver.py Proyecto: hsed/fyp
def test_epoch_dropout(model,
                       test_loader,
                       result_collector,
                       device=torch.device('cuda'),
                       dtype=torch.float,
                       output_rate=10):
    '''
		use dropouts to produce output_rate outputs for each input and then use the mean as output
		we could additionally do the above and supply multiple outcomes.

		important please use batch_size=1 for data_loader!
		
		`output_rate` => number of evaluations or outputs per input sample
	'''
    ## special case here we will be doing dropout and using the mean value ideally...
    ## in this case all dropouts are used, maybe in future we would want to use only one dropout
    model.train()

    with torch.no_grad():
        for batch_idx, (inputs, targets, extra) in enumerate(test_loader):
            assert (targets.shape[0] == 1
                    )  # only works currently for batch_size=1
            output_set = torch.cat([
                model.forward_eval(inputs.to(device, dtype))
                for _ in range(output_rate)
            ])

            #print("Set shape: ", output_set.shape)

            ## do torch.mean in the right axis
            output_mean = torch.mean(output_set, dim=0, keepdim=True)

            #print("Mean Shape: ", output_mean.shape)
            #quit()

            # result college will convert y ->
            result_collector((inputs, output_mean, targets, extra))

            progress_bar(batch_idx, len(test_loader))
Ejemplo n.º 5
0
Archivo: solver.py Proyecto: hsed/fyp
def test_epoch(model,
               test_loader,
               result_collector,
               device=torch.device('cuda'),
               dtype=torch.float):
    model.eval()

    with torch.no_grad():
        for batch_idx, (inputs, targets, extra) in enumerate(test_loader):
            ## test_loader should have a final transformer which returns
            ## `y_gt_mm_not_centered` i.e. straight from dataset
            ## `x_cropped_scaled` i.e. no aug but perform crop based on CoM (from dataset) and scale according
            ## to the model requirements i.e 128x128
            ## `CoM point` <-- this will be used to recover back y_pred_mm_not_centered
            # y_pred_std_centered # in mm but standardised
            # values are i.e. [-1, 1]
            ## need special function forward eval to now also pass through last pca layer ans get back
            ## 1, 63  instead of 1, 30
            outputs = model.forward_eval(inputs.to(device, dtype))

            # result college will convert y ->
            result_collector((inputs, outputs, targets, extra))

            progress_bar(batch_idx, len(test_loader))
Ejemplo n.º 6
0
def main():
    config_data = config.EvalExConfig()
    args = parse_arguments()
    # create analysisfiles for all vcf and json pairs 1KG
    if args.createfiles:
        # clear directories
        exomizerhelper.resetdir(config_data['directories']['analysisfiles'],
                                config_data['directories']['backup'])
        exomizerhelper.resetdir(config_data['directories']['results'],
                                config_data['directories']['backup'])

        vcffiles = []
        # create anaylsis files for each vcf file
        for file in os.listdir(config_data['directories']['vcf']):
            vcffiles.append(file)
        case_objs = progressbar.progress_bar("Create anaylsis files")(
            lambda file: createanafile(file, config_data))(vcffiles)

    # execute exomizer on batch file
    if args.exomizer:
        # create analysis batch file of all analysisfiles
        batchfile = exomizerhelper.createbatchanalysis(
            config_data['directories']['analysisfiles'])
        FNULL = open("exomizer.log", 'w')
        print("Executing Exomizer...")
        subprocess.run([
            "java", "-Xms4g", "-Xmx8g", "-jar",
            config_data['exomizer']['exomizerdir'] + ".jar",
            "--analysis-batch", batchfile
        ],
                       cwd=config_data['exomizer']['exomizerdir'])
        print("Exomizer finished")

    # analyze results
    if args.ranks:
        print("Getting rank data")
        ranks = []
        for dir in os.listdir(config_data['directories']['results']):
            if (dir != ".gitignore" and not dir.endswith(".zip")):
                ranks.append(
                    exomizerhelper.getrank(
                        dir, config_data['directories']['json'],
                        config_data['directories']['results']))

        # print ranks as percentages
        exomizerhelper.outputranks(ranks)
Ejemplo n.º 7
0
def main():
    #######################################################################################
    ## Configurations
    #print('Warning: disable cudnn for batchnorm first, or just use only cuda instead!')
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)

    args = parse_args()

    device = torch.device('cuda:%d' % args.device_id) if torch.cuda.is_available() \
                else torch.device('cpu')
    dtype = torch.float

    resume_train = args.resume >= 0
    resume_after_epoch = args.resume
    save_checkpoint = True
    checkpoint_dir = r'checkpoint'

    # FROZEN_STATE
    # True => Don't do training just load model from checkpoint and test out
    # False resume, don't resume etc.
    FROZEN_STATE = (args.checkpoint is not None)

    EPOCHS_PER_CHECKPOINT = 5  # 5

    START_EPOCH = 0
    NUM_EPOCHS = args.epochs  #3#1#2#1

    # Data
    DATA_DIR = r'datasets/MSRA15'
    CENTER_DIR = r'datasets/MSRA15_CenterofMassPts'

    NUM_KEYPOINTS = 21
    TEST_SUBJ_ID = args.test_subj_id  #3 ##changed for now
    PCA_COMP = 30
    IMGSZ_PX = 128
    CROPSZ_MM = 200
    DEBUG_MODE = False

    AUG_MODES = [AugType.AUG_ROT, AugType.AUG_TRANS,
                 AugType.AUG_NONE]  #, AugType.AUG_SC, AugType.AUG_TRANS
    PCA_AUG_MODES = [AugType.AUG_ROT, AugType.AUG_TRANS, AugType.AUG_NONE]  #

    ### if refined_com: TODO: save/load pca with different name!s
    if args.reduced_dataset: print("Info: Using reduced dataset for training.")
    if not args.refined_com:
        print("Info: Using GT CoM references for training.")

    print("Info: AUG_MODES: ", [aug.name for aug in AUG_MODES])
    print("Info: PCA_AUG_MODES: ", [aug.name for aug in PCA_AUG_MODES])
    print("Info: TEST_SUBJ_ID: ", TEST_SUBJ_ID)

    ### common kwargs for MSRADataset
    MSRA_KWARGS = {
        'reduce': args.reduced_dataset,
        'use_refined_com': args.refined_com
    }

    ### fix for linux filesystem
    torch.multiprocessing.set_sharing_strategy('file_system')

    ######################################################################################
    ## Transforms
    # use default crop sizes 200mm
    # use overwrite_cache = True when you want to force learn a new PCA matrix
    # by default we've been using device cpu, actually for PCA cpu device is ok
    # also we need a lot more mem for svd ~100GB or so! Thus, cpu is fine.
    transform_pca = PCATransform(n_components=PCA_COMP,
                                 use_cache=True,
                                 overwrite_cache=args.force_pca)

    transform_train = DeepPriorXYTransform(depthmap_px=IMGSZ_PX,
                                           crop_len_mm=CROPSZ_MM,
                                           aug_mode_lst=AUG_MODES,
                                           debug_mode=DEBUG_MODE)

    # its equivalent as train transformer except for augmentation
    transform_val = DeepPriorXYTransform(depthmap_px=IMGSZ_PX,
                                         crop_len_mm=CROPSZ_MM,
                                         aug_mode_lst=[AugType.AUG_NONE])

    #Must Ensure NO Augmentation for test as inverter can't handle that.
    transform_test = DeepPriorXYTestTransform(depthmap_px=IMGSZ_PX,
                                              crop_len_mm=CROPSZ_MM,
                                              aug_mode_lst=[AugType.AUG_NONE])

    # used for pca_calc
    # New: Now we can augment data even for PCA_calc
    # Ensure to supply PCA_AUG_MODES not AUG_MODES
    transform_y = DeepPriorYTransform(depthmap_px=IMGSZ_PX,
                                      crop_len_mm=CROPSZ_MM,
                                      aug_mode_lst=PCA_AUG_MODES)

    ## used at test time
    transform_output = DeepPriorYTestInverseTransform(crop_len_mm=CROPSZ_MM)

    #######################################################################################
    ## PCA
    # if pca_data wasn't cached we must load all y_data and call fit function
    if transform_pca.transform_matrix_np is None:
        # each sample is 1x21x3 so we use cat to make it 3997x21x3
        # id we use stack it intriduces a new dim so 3997x1x21x3
        # load all y_sample sin tprch array
        # note only train subjects are loaded!
        y_set = MARAHandDataset(DATA_DIR, CENTER_DIR, 'train', TEST_SUBJ_ID,
                                transform_y, **MSRA_KWARGS)

        y_pca_len = int(2e5)
        y_idx_pca = np.random.choice(len(y_set), y_pca_len, replace=True)
        #print(y_idx_pca, y_idx_pca.shape)
        #y_loader = torch.utils.data.DataLoader(y_set, batch_size=1, shuffle=True, num_workers=0)
        print('==> Collating %d y_samples for PCA ..' % y_pca_len)

        fullYList = []
        for (i, item) in enumerate(y_idx_pca):  #y_loader
            fullYList.append(y_set[item])
            progress_bar(i, y_pca_len)  #y_loader

        y_train_samples = torch.from_numpy(
            np.stack(fullYList))  #tuple(y_loader) #torch.cat()
        #print(fullList)
        print("\nY_GT_STD SHAPE: ", y_train_samples.shape, "Max: ",
              np.max(y_train_samples.numpy()), "Min: ",
              np.min(y_train_samples.numpy()), "\n")
        # in future just use fit command, fit_transform is just for testing
        print('==> fitting PCA ..')
        zz = transform_pca.fit_transform(y_train_samples)
        print("PCA_Y_SHAPE: ", zz.shape, "MAX: ", zz.max(), "MIN: ", zz.min(),
              "\n")
        print('==> PCA fitted ..')

        del y_train_samples
        del fullYList
        #del y_loader
        del y_set

    # print("PCA Matrix, Vector: \t",
    #         transform_pca.transform_matrix_np.shape,
    #         "\t",transform_pca.mean_vect_np.shape)

    ## note if we transpose this matrix its a U matrix so
    ## unitary i.e. inverse is transpose so we can then invert the transformation
    ## so to supply as last layer we will supply transform_matx.T
    #' bias will be supplied as is but may need reshaping.

    #######################################################################################
    ## Data, transform, dataset and loader

    print('==> Preparing data ..')
    ## dataset returns np array samples ; dataloader returns torch samples and also shuffled
    train_set = MARAHandDataset(
        DATA_DIR, CENTER_DIR, 'train', TEST_SUBJ_ID,
        transforms.Compose([transform_train, transform_pca]), **MSRA_KWARGS)
    ## set here collate_fn as convert to pca
    ## there is some problem here basically we can't parallelize the transformers so
    ## if u use num_workers=4 it just gives NaN results
    ## if its 0 then all its fine.
    ## i think somehow transformers can't exist in  each thread or something
    ## must be to do with pca function,
    ## shuld we try composing transformers? -- doesnt help
    ## essentially num workers must be 0 -- lets just keep this as is for now
    ## we have fixed this now by using shared_mem types from multiprocessing model
    ## now we run at 100% cpu while training X_X
    # batch size 1 ---> 128
    # 128 -- is from deep_prior_pp
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=128,
                                               shuffle=True,
                                               num_workers=4)

    # No separate validation dataset, just use test dataset instead
    # here we are using test_subj_id for val, this is data-snooping
    # so if we do any hyperparam search using valset it will be bad to
    # report test error using same set
    # we'll fix this afterwards...
    # for validation batch size doesn't matter as we don't train, only calc error
    val_set = MARAHandDataset(
        DATA_DIR, CENTER_DIR, 'test', TEST_SUBJ_ID,
        transforms.Compose([transform_val, transform_pca]), **MSRA_KWARGS)

    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=512,
                                             shuffle=False,
                                             num_workers=4)  #6
    # print("ValSetSz: ", len(val_set))

    ### debugging
    # xTmp, yTmp = train_set[0]
    # xTmp, yTmp = train_set[1]
    # xTmp, yTmp = train_set[2]
    # quit()
    #print("Y_TMP", yTmp.shape)
    #print(yTmp)
    #quit()
    # we see a time differ of 11s vs 14s for 4 workers vs 0 worker
    # import time
    # t = time.time()
    # a = []
    # for batch_idx, (inputs, targets) in enumerate(train_loader):
    #    a.append(targets.clone())

    # c = torch.cat(a)
    # print("\n\ntook: ", time.time()-t, "s newsamples SHAPE: ", c.shape, "Max: ", np.max(c.numpy()), "Min: ", np.min(c.numpy()))
    # quit()

    #######################################################################################
    ## Model, criterion and optimizer
    #Weight matx is transform_matx.Transpose as its inverse transform
    print('==> Constructing model ..')
    net = DeepPriorPPModel(input_channels=1,
                           num_joints=NUM_KEYPOINTS,
                           num_dims=3,
                           pca_components=PCA_COMP,
                           dropout_prob=0.3,
                           train_mode=True,
                           weight_matx_np=transform_pca.transform_matrix_np.T,
                           bias_matx_np=transform_pca.mean_vect_np)

    net = net.to(device, dtype)
    if device == torch.device('cuda'):
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True
        print('cudnn.enabled: ', torch.backends.cudnn.enabled)

    ## make this better like huber loss etc?
    ## deep-prior-pp in code just uses MSELoss
    criterion = nn.MSELoss()

    ## params from deep-prior-pp
    optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0)
    #optimizer = optim.RMSprop(net.parameters(), lr=2.5e-4)

    #######################################################################################
    ## Resume / FROZEN_STATE
    if FROZEN_STATE:
        # if frozen state is being loaded, don't do training!
        checkpoint_file = args.checkpoint  # load directly from cmd_line
        print('==> Loading frozen state from checkpoint file {} ..'.format(
            os.path.basename(checkpoint_file)))
        checkpoint = torch.load(checkpoint_file)
        net.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    else:
        if resume_train:
            # Load checkpoint
            epoch = resume_after_epoch
            checkpoint_file = os.path.join(checkpoint_dir,
                                           'epoch' + str(epoch) + '.pth')

            print('==> Resuming from checkpoint after epoch id {} ..'.format(
                epoch))
            assert os.path.isdir(
                checkpoint_dir), 'Error: no checkpoint directory found!'
            assert os.path.isfile(
                checkpoint_file
            ), 'Error: no checkpoint file of epoch {}'.format(epoch)

            checkpoint = torch.load(
                os.path.join(checkpoint_dir, 'epoch' + str(epoch) + '.pth'))
            net.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            START_EPOCH = checkpoint['epoch'] + 1

        #######################################################################################
        ## Train and Validate
        print('==> Training ..')
        train_time = time.time()
        # changed training_procedure so that if loaded epoch_id+1 = NUM_EPOCHS then don't train at all
        # i.e. if we are to train 1 epoch and epoch_0 was loaded, no need to train further
        for epoch in range(START_EPOCH, NUM_EPOCHS):
            print('Epoch: {}'.format(epoch))
            train_epoch(net,
                        criterion,
                        optimizer,
                        train_loader,
                        device=device,
                        dtype=dtype)
            val_epoch(net, criterion, val_loader, device=device, dtype=dtype)

            # if ep_per_chkpt = 5, save as ep_id: 4, 9, 14, 19, 24, 29
            if save_checkpoint and (epoch + 1) % (EPOCHS_PER_CHECKPOINT) == 0:
                if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir)
                checkpoint_file = os.path.join(checkpoint_dir,
                                               'epoch' + str(epoch) + '.pth')
                checkpoint = {
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': epoch
                }
                torch.save(checkpoint, checkpoint_file)

        print("Training took: ", format_time(time.time() - train_time), '\n')
    #######################################################################################
    ## Test
    print('==> Testing ..')

    # currently our test_set === val_set !! TODO: change this
    # print('Test on test dataset ..')
    test_set = MARAHandDataset(DATA_DIR,
                               CENTER_DIR,
                               'test',
                               TEST_SUBJ_ID,
                               transform=transform_test,
                               **MSRA_KWARGS)

    ## increaase batch size and workers for fastr (parallel) calc in future
    ## forget batch~_size for now as addition of com_Batch doesnt work properly#
    ## its (500,21,3) + (500,3)
    ## need to use np.repeat to make com (500,3) -> (500,21,3)
    ## ensure error is same regardless of batch size!! --> correct upto 1e-6
    dropout = False

    if dropout:
        test_loader = \
            torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=4)
    else:
        test_loader = \
            torch.utils.data.DataLoader(test_set, batch_size=512, shuffle=False, num_workers=4)
    test_res_collector = DeepPriorBatchResultCollector(test_loader,
                                                       transform_output,
                                                       len(test_set))

    if not dropout:
        test_epoch(net, test_loader, test_res_collector, device, dtype)
    else:
        test_epoch_dropout(net, test_loader, test_res_collector, device, dtype)
    #keypoints_test = test_res_collector.get_result()
    # save_keypoints('./test_res.txt', keypoints_test)

    # print('Fit on train dataset ..')
    # fit_set = MARAHandDataset(DATA_DIR, CENTER_DIR, 'train', TEST_SUBJ_ID, transform_test)
    # fit_loader = torch.utils.data.DataLoader(fit_set, batch_size=1, shuffle=False, num_workers=1)
    # fit_res_collector = BatchResultCollector(fit_loader, transform_output)

    # test_epoch(net, fit_loader, fit_res_collector, device, dtype)
    # keypoints_fit = fit_res_collector.get_result()
    # save_keypoints('./fit_res.txt', keypoints_fit)

    print("\nFINAL_AVG_3D_ERROR: %0.4fmm" %
          test_res_collector.calc_avg_3D_error())

    print("With Config:", "{GT_CoM: %s, Aug: %s, PCA_Aug: %s,\nFull_Dataset: %s, PCA_SZ: %d}" % \
                (not args.refined_com, [aug.name for aug in AUG_MODES],
                 [aug.name for aug in PCA_AUG_MODES], not args.reduced_dataset, y_pca_len))

    if args.save_eval and not args.refined_com:
        ### new saving results and plots ###
        print('\n==> Saving ..')
        pred_fname = 'eval/MSRA15/eval_test_%d_ahpe_gt_com.txt' % TEST_SUBJ_ID
        plot_fname = 'eval/MSRA15/msra_test_%d_joints_acc_gt_com.png' % TEST_SUBJ_ID

        #test_res_collector.get_ahpe_result('eval/MSRA15/msra_test_list.txt', TEST_SUBJ_ID, DATA_DIR)

        saveKeypoints(pred_fname, test_res_collector.get_result())
        print("Keypoints saved to %s..." % pred_fname)

        names = ['joint_' + str(i + 1) for i in range(NUM_KEYPOINTS)]
        dist, acc = test_res_collector.compute_dist_acc_wrapper(max_dist=100,
                                                                num=100)
        fig, ax = plt.subplots()
        plot_acc(ax, dist, acc, names)
        ax.grid(which='both')
        fig.savefig(plot_fname)
        #plt.show()
        print("Plot saved to %s..." % plot_fname)
    elif args.save_eval:
        print(
            "Warning: Cannot save file as its incompatible with AHPE when using refine_com"
        )

    print('\nAll done ..')