def train(train_loader, model, optimizer, train_vars, control_vars, verbose=True):
    curr_epoch_iter = 1
    for batch_idx, (data, target) in enumerate(train_loader):
        control_vars['batch_idx'] = batch_idx
        if batch_idx < control_vars['iter_size']:
            print_verbose("\rPerforming first iteration; current mini-batch: " +
                  str(batch_idx+1) + "/" + str(control_vars['iter_size']), verbose, n_tabs=0, erase_line=True)
        # check if arrived at iter to start
        if control_vars['curr_epoch_iter'] < control_vars['start_iter_mod']:
            if batch_idx % control_vars['iter_size'] == 0:
                print_verbose("\rGoing through iterations to arrive at last one saved... " +
                      str(int(control_vars['curr_epoch_iter']*100.0/control_vars['start_iter_mod'])) + "% of " +
                      str(control_vars['start_iter_mod']) + " iterations (" +
                      str(control_vars['curr_epoch_iter']) + "/" + str(control_vars['start_iter_mod']) + ")",
                              verbose, n_tabs=0, erase_line=True)
                control_vars['curr_epoch_iter'] += 1
                control_vars['curr_iter'] += 1
                curr_epoch_iter += 1
            continue
        # save checkpoint after final iteration
        if control_vars['curr_iter'] == control_vars['num_iter']:
            print_verbose("\nReached final number of iterations: " + str(control_vars['num_iter']), verbose)
            print_verbose("\tSaving final model checkpoint...", verbose)
            final_model_dict = {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'control_vars': control_vars,
                'train_vars': train_vars,
            }
            trainer.save_checkpoint(final_model_dict,
                            filename=train_vars['checkpoint_filenamebase'] +
                                     'final' + str(control_vars['num_iter']) + '.pth.tar')
            control_vars['done_training'] = True
            break
        # start time counter
        start = time.time()
        # get data and targetas cuda variables
        target_heatmaps, target_joints, _, target_prior = target
        data, target_heatmaps, target_prior = Variable(data), Variable(target_heatmaps), Variable(target_prior)
        if train_vars['use_cuda']:
            data = data.cuda()
            target_heatmaps = target_heatmaps.cuda()
            target_prior = target_prior.cuda()
        # visualize if debugging
        # get model output
        output = model(data)
        # accumulate loss for sub-mini-batch
        if train_vars['cross_entropy']:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        loss, loss_prior = my_losses.calculate_loss_HALNet_prior(loss_func,
            output, target_heatmaps, target_prior, model.joint_ixs, model.WEIGHT_LOSS_INTERMED1,
            model.WEIGHT_LOSS_INTERMED2, model.WEIGHT_LOSS_INTERMED3,
            model.WEIGHT_LOSS_MAIN, control_vars['iter_size'])
        loss.backward()
        train_vars['total_loss'] += loss
        train_vars['total_loss_prior'] += loss_prior
        # accumulate pixel dist loss for sub-mini-batch
        train_vars['total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
            train_vars['total_pixel_loss'], output[3], target_heatmaps, control_vars['batch_size'])
        if train_vars['cross_entropy']:
            train_vars['total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                train_vars['total_pixel_loss_sample'], output[3], target_heatmaps, control_vars['batch_size'])
        else:
            train_vars['total_pixel_loss_sample'] = [-1] * len(model.joint_ixs)
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx+1) % control_vars['iter_size'] == 0
        if minibatch_completed:
            # optimise for mini-batch
            optimizer.step()
            # clear optimiser
            optimizer.zero_grad()
            # append total loss
            train_vars['losses'].append(train_vars['total_loss'].data[0])
            # erase total loss
            total_loss = train_vars['total_loss'].data[0]
            train_vars['total_loss'] = 0
            # append total loss prior
            train_vars['losses_prior'].append(train_vars['total_loss_prior'].data[0])
            # erase total loss
            total_loss_prior = train_vars['total_loss_prior'].data[0]
            train_vars['total_loss_prior'] = 0
            # append dist loss
            train_vars['pixel_losses'].append(train_vars['total_pixel_loss'])
            # erase pixel dist loss
            train_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            train_vars['pixel_losses_sample'].append(train_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            train_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            if train_vars['losses'][-1] < train_vars['best_loss']:
                train_vars['best_loss'] = train_vars['losses'][-1]
                print_verbose("  This is a best loss found so far: " + str(train_vars['losses'][-1]), verbose)
                train_vars['best_model_dict'] = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': train_vars,
                }
            if train_vars['losses_prior'][-1] < train_vars['best_loss_prior']:
                train_vars['best_loss_prior'] = train_vars['losses_prior'][-1]
            # log checkpoint
            if control_vars['curr_iter'] % control_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, epoch, total_loss, train_vars, control_vars)
                msg = ''
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                msg += print_verbose("Current loss (prior): " + str(total_loss_prior), verbose) + "\n"
                msg += print_verbose("Best loss (prior): " + str(train_vars['best_loss_prior']), verbose) + "\n"
                msg += print_verbose("Mean total loss (prior): " + str(np.mean(train_vars['losses_prior'])), verbose) + "\n"
                msg += print_verbose("Mean loss (prior) for last " + str(control_vars['log_interval']) +
                                     " iterations (average total loss): " + str(
                    np.mean(train_vars['losses_prior'][-control_vars['log_interval']:])), verbose) + "\n"
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                if not control_vars['output_filepath'] == '':
                    with open(control_vars['output_filepath'], 'a') as f:
                        f.write(msg + '\n')

            if control_vars['curr_iter'] % control_vars['log_interval_valid'] == 0:
                print_verbose("\nSaving model and checkpoint model for validation", verbose)
                checkpoint_model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': train_vars,
                }
                trainer.save_checkpoint(checkpoint_model_dict,
                                        filename=train_vars['checkpoint_filenamebase'] + 'for_valid_' +
                                                 str(control_vars['curr_iter']) + '.pth.tar')

            # print time lapse
            prefix = 'Training (Epoch #' + str(epoch) + ' ' + str(control_vars['curr_epoch_iter']) + '/' +\
                     str(control_vars['tot_iter']) + ')' + ', (Batch ' + str(control_vars['batch_idx']+1) +\
                     '(' + str(control_vars['iter_size']) + ')' + '/' +\
                     str(control_vars['num_batches']) + ')' + ', (Iter #' + str(control_vars['curr_iter']) +\
                     '(' + str(control_vars['batch_size']) + ')' +\
                     ' - log every ' + str(control_vars['log_interval']) + ' iter): '
            control_vars['tot_toc'] = display_est_time_loop(control_vars['tot_toc'] + time.time() - start,
                                                            control_vars['curr_iter'], control_vars['num_iter'],
                                                            prefix=prefix)

            control_vars['curr_iter'] += 1
            control_vars['start_iter'] = control_vars['curr_iter'] + 1
            control_vars['curr_epoch_iter'] += 1


    return train_vars, control_vars
def validate(valid_loader, model, optimizer, valid_vars, control_vars, verbose=True):
    curr_epoch_iter = 1
    for batch_idx, (data, target) in enumerate(valid_loader):
        control_vars['batch_idx'] = batch_idx
        if batch_idx < control_vars['iter_size']:
            print_verbose("\rPerforming first iteration; current mini-batch: " +
                          str(batch_idx + 1) + "/" + str(control_vars['iter_size']), verbose, n_tabs=0, erase_line=True)
        # start time counter
        start = time.time()
        # get data and targetas cuda variables
        target_heatmaps, target_joints, target_joints_z = target
        data, target_heatmaps = Variable(data), Variable(target_heatmaps)
        if valid_vars['use_cuda']:
            data = data.cuda()
            target_heatmaps = target_heatmaps.cuda()
        # visualize if debugging
        # get model output
        output = model(data)
        # accumulate loss for sub-mini-batch
        if valid_vars['cross_entropy']:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        loss = my_losses.calculate_loss_HALNet(loss_func,
            output, target_heatmaps, model.joint_ixs, model.WEIGHT_LOSS_INTERMED1,
            model.WEIGHT_LOSS_INTERMED2, model.WEIGHT_LOSS_INTERMED3,
            model.WEIGHT_LOSS_MAIN, control_vars['iter_size'])

        if DEBUG_VISUALLY:
            for i in range(control_vars['max_mem_batch']):
                filenamebase_idx = (batch_idx * control_vars['max_mem_batch']) + i
                filenamebase = valid_loader.dataset.get_filenamebase(filenamebase_idx)
                fig = visualize.create_fig()
                #visualize.plot_joints_from_heatmaps(output[3][i].data.numpy(), fig=fig,
                #                                    title=filenamebase, data=data[i].data.numpy())
                #visualize.plot_image_and_heatmap(output[3][i][8].data.numpy(),
                #                                 data=data[i].data.numpy(),
                #                                 title=filenamebase)
                #visualize.savefig('/home/paulo/' + filenamebase.replace('/', '_') + '_heatmap')

                labels_colorspace = conv.heatmaps_to_joints_colorspace(output[3][i].data.numpy())
                data_crop, crop_coords, labels_heatmaps, labels_colorspace = \
                    converter.crop_image_get_labels(data[i].data.numpy(), labels_colorspace, range(21))
                visualize.plot_image(data_crop, title=filenamebase, fig=fig)
                visualize.plot_joints_from_colorspace(labels_colorspace, title=filenamebase, fig=fig, data=data_crop)
                #visualize.savefig('/home/paulo/' + filenamebase.replace('/', '_') + '_crop')
                visualize.show()

        #loss.backward()
        valid_vars['total_loss'] += loss
        # accumulate pixel dist loss for sub-mini-batch
        valid_vars['total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
            valid_vars['total_pixel_loss'], output[3], target_heatmaps, control_vars['batch_size'])
        if valid_vars['cross_entropy']:
            valid_vars['total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                valid_vars['total_pixel_loss_sample'], output[3], target_heatmaps, control_vars['batch_size'])
        else:
            valid_vars['total_pixel_loss_sample'] = [-1] * len(model.joint_ixs)
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx+1) % control_vars['iter_size'] == 0
        if minibatch_completed:
            # append total loss
            valid_vars['losses'].append(valid_vars['total_loss'].item())
            # erase total loss
            total_loss = valid_vars['total_loss'].item()
            valid_vars['total_loss'] = 0
            # append dist loss
            valid_vars['pixel_losses'].append(valid_vars['total_pixel_loss'])
            # erase pixel dist loss
            valid_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            valid_vars['pixel_losses_sample'].append(valid_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            valid_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            if valid_vars['losses'][-1] < valid_vars['best_loss']:
                valid_vars['best_loss'] = valid_vars['losses'][-1]
                #print_verbose("  This is a best loss found so far: " + str(valid_vars['losses'][-1]), verbose)
            # log checkpoint
            if control_vars['curr_iter'] % control_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, 1, total_loss, valid_vars, control_vars)
                model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': valid_vars,
                }
                trainer.save_checkpoint(model_dict,
                                        filename=valid_vars['checkpoint_filenamebase'] +
                                                 str(control_vars['num_iter']) + '.pth.tar')
            # print time lapse
            prefix = 'Validating (Epoch #' + str(1) + ' ' + str(control_vars['curr_epoch_iter']) + '/' +\
                     str(control_vars['tot_iter']) + ')' + ', (Batch ' + str(control_vars['batch_idx']+1) +\
                     '(' + str(control_vars['iter_size']) + ')' + '/' +\
                     str(control_vars['num_batches']) + ')' + ', (Iter #' + str(control_vars['curr_iter']) +\
                     '(' + str(control_vars['batch_size']) + ')' +\
                     ' - log every ' + str(control_vars['log_interval']) + ' iter): '
            control_vars['tot_toc'] = display_est_time_loop(control_vars['tot_toc'] + time.time() - start,
                                                            control_vars['curr_iter'], control_vars['num_iter'],
                                                            prefix=prefix)

            control_vars['curr_iter'] += 1
            control_vars['start_iter'] = control_vars['curr_iter'] + 1
            control_vars['curr_epoch_iter'] += 1


    return valid_vars, control_vars
Beispiel #3
0
            print('Continuing... {}/{}'.format(batch_idx, continue_batch_end_ix))
            continue
        train_vars['batch_idx'] = batch_idx
        train_vars['curr_iter'] = batch_idx + 1
        if args.use_cuda:
            data = data.cuda()
            label_heatmaps = label_heatmaps.cuda()


        # zero out torch gradients
        optimizer.zero_grad()

        # clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        output = model(data)

        loss = my_losses.calculate_loss_HALNet(loss_func,
                                               output, label_heatmaps, model.joint_ixs, model.WEIGHT_LOSS_INTERMED1,
                                               model.WEIGHT_LOSS_INTERMED2, model.WEIGHT_LOSS_INTERMED3,
                                               model.WEIGHT_LOSS_MAIN, train_vars['iter_size'])
        loss.backward()
        train_vars['total_loss'] = loss.item()
        train_vars['losses'].append(train_vars['total_loss'])
        if train_vars['total_loss'] < train_vars['best_loss']:
            train_vars['best_loss'] = train_vars['total_loss']

        optimizer.step()

        if batch_idx % args.log_interval == 0:
            trainer.print_log_info(model, optimizer, epoch,  train_vars)
def train(train_loader, model, optimizer, train_vars):
    verbose = train_vars['verbose']
    for batch_idx, (data, target) in enumerate(train_loader):
        train_vars['batch_idx'] = batch_idx
        # print info about performing first iter
        if batch_idx < train_vars['iter_size']:
            print_verbose(
                "\rPerforming first iteration; current mini-batch: " +
                str(batch_idx + 1) + "/" + str(train_vars['iter_size']),
                verbose,
                n_tabs=0,
                erase_line=True)
        # check if arrived at iter to start
        arrived_curr_iter, train_vars = run_until_curr_iter(
            batch_idx, train_vars)
        if not arrived_curr_iter:
            continue
        # save checkpoint after final iteration
        if train_vars['curr_iter'] - 1 == train_vars['num_iter']:
            train_vars = trainer.save_final_checkpoint(train_vars, model,
                                                       optimizer)
            break
        # start time counter
        start = time.time()
        # get data and target as torch Variables
        _, target_joints, target_heatmaps, target_joints_z = target
        # make target joints be relative
        target_joints = target_joints[:, 3:]
        data, target_heatmaps = Variable(data), Variable(target_heatmaps)
        if train_vars['use_cuda']:
            data = data.cuda()
            target_heatmaps = target_heatmaps.cuda()
            target_joints = target_joints.cuda()
            target_joints_z = target_joints_z.cuda()
        # get model output
        output = model(data)

        # accumulate loss for sub-mini-batch
        if train_vars['cross_entropy']:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        weights_heatmaps_loss, weights_joints_loss = get_loss_weights(
            train_vars['curr_iter'])
        loss, loss_heatmaps, loss_joints = my_losses.calculate_loss_JORNet(
            loss_func, output, target_heatmaps, target_joints,
            train_vars['joint_ixs'], weights_heatmaps_loss,
            weights_joints_loss, train_vars['iter_size'])
        loss.backward()
        train_vars['total_loss'] += loss.item()
        train_vars['total_joints_loss'] += loss_joints.item()
        train_vars['total_heatmaps_loss'] += loss_heatmaps.item()
        # accumulate pixel dist loss for sub-mini-batch
        train_vars[
            'total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
                train_vars['total_pixel_loss'], output[3], target_heatmaps,
                train_vars['batch_size'])
        if train_vars['cross_entropy']:
            train_vars[
                'total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                    train_vars['total_pixel_loss_sample'], output[3],
                    target_heatmaps, train_vars['batch_size'])
        else:
            train_vars['total_pixel_loss_sample'] = [-1] * len(model.joint_ixs)
        '''
        For debugging training
        for i in range(train_vars['max_mem_batch']):
            filenamebase_idx = (batch_idx * train_vars['max_mem_batch']) + i
            filenamebase = train_loader.dataset.get_filenamebase(filenamebase_idx)
            visualize.plot_joints_from_heatmaps(target_heatmaps[i].data.cpu().numpy(),
                                                title='GT joints: ' + filenamebase, data=data[i].data.cpu().numpy())
            visualize.plot_joints_from_heatmaps(output[3][i].data.cpu().numpy(),
                                                title='Pred joints: ' + filenamebase, data=data[i].data.cpu().numpy())
            visualize.plot_image_and_heatmap(output[3][i][4].data.numpy(),
                                             data=data[i].data.numpy(),
                                             title='Thumb tib heatmap: ' + filenamebase)
            visualize.show()
        '''

        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx + 1) % train_vars['iter_size'] == 0
        if minibatch_completed:
            # visualize
            # ax, fig = visualize.plot_3D_joints(target_joints[0])
            # visualize.plot_3D_joints(target_joints[1], ax=ax, fig=fig)
            if train_vars['curr_iter'] % train_vars['log_interval'] == 0:
                fig, ax = visualize.plot_3D_joints(target_joints[0])
                visualize.savefig('joints_GT_' + str(train_vars['curr_iter']) +
                                  '.png')
                #visualize.plot_3D_joints(target_joints[1], fig=fig, ax=ax, color_root='C7')
                #visualize.plot_3D_joints(output[7].data.cpu().numpy()[0], fig=fig, ax=ax, color_root='C7')
                visualize.plot_3D_joints(output[7].data.cpu().numpy()[0])
                visualize.savefig('joints_model_' +
                                  str(train_vars['curr_iter']) + '.png')
                #visualize.show()
                #visualize.savefig('joints_' + str(train_vars['curr_iter']) + '.png')
            # change learning rate to 0.01 after 45000 iterations
            optimizer = change_learning_rate(optimizer, 0.01,
                                             train_vars['curr_iter'])
            # optimise for mini-batch
            optimizer.step()
            # clear optimiser
            optimizer.zero_grad()
            # append total loss
            train_vars['losses'].append(train_vars['total_loss'])
            # erase total loss
            total_loss = train_vars['total_loss']
            train_vars['total_loss'] = 0
            # append total joints loss
            train_vars['losses_joints'].append(train_vars['total_joints_loss'])
            # erase total joints loss
            train_vars['total_joints_loss'] = 0
            # append total joints loss
            train_vars['losses_heatmaps'].append(
                train_vars['total_heatmaps_loss'])
            # erase total joints loss
            train_vars['total_heatmaps_loss'] = 0
            # append dist loss
            train_vars['pixel_losses'].append(train_vars['total_pixel_loss'])
            # erase pixel dist loss
            train_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            train_vars['pixel_losses_sample'].append(
                train_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            train_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            if train_vars['losses'][-1] < train_vars['best_loss']:
                train_vars['best_loss'] = train_vars['losses'][-1]
                print_verbose(
                    "  This is a best loss found so far: " +
                    str(train_vars['losses'][-1]), verbose)
                train_vars['best_model_dict'] = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_vars': train_vars
                }
            # log checkpoint
            if train_vars['curr_iter'] % train_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, epoch, total_loss,
                                       train_vars, train_vars)
                aa1 = target_joints[0].data.cpu().numpy()
                aa2 = output[7][0].data.cpu().numpy()
                output_joint_loss = np.sum(np.abs(aa1 - aa2)) / 63
                msg = ''
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                msg += print_verbose(
                    '\tJoint Coord Avg Loss for first image of current mini-batch: '
                    + str(output_joint_loss) + '\n', train_vars['verbose'])
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                if not train_vars['output_filepath'] == '':
                    with open(train_vars['output_filepath'], 'a') as f:
                        f.write(msg + '\n')
            if train_vars['curr_iter'] % train_vars['log_interval_valid'] == 0:
                print_verbose(
                    "\nSaving model and checkpoint model for validation",
                    verbose)
                checkpoint_model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_vars': train_vars,
                }
                trainer.save_checkpoint(
                    checkpoint_model_dict,
                    filename=train_vars['checkpoint_filenamebase'] +
                    'for_valid_' + str(train_vars['curr_iter']) + '.pth.tar')

            # print time lapse
            prefix = 'Training (Epoch #' + str(epoch) + ' ' + str(train_vars['curr_epoch_iter']) + '/' +\
                     str(train_vars['tot_iter']) + ')' + ', (Batch ' + str(train_vars['batch_idx']+1) +\
                     '(' + str(train_vars['iter_size']) + ')' + '/' +\
                     str(train_vars['num_batches']) + ')' + ', (Iter #' + str(train_vars['curr_iter']) +\
                     '(' + str(train_vars['batch_size']) + ')' +\
                     ' - log every ' + str(train_vars['log_interval']) + ' iter): '
            train_vars['tot_toc'] = display_est_time_loop(
                train_vars['tot_toc'] + time.time() - start,
                train_vars['curr_iter'],
                train_vars['num_iter'],
                prefix=prefix)

            train_vars['curr_iter'] += 1
            train_vars['start_iter'] = train_vars['curr_iter'] + 1
            train_vars['curr_epoch_iter'] += 1
    return train_vars
Beispiel #5
0
def validate(valid_loader,
             model,
             optimizer,
             valid_vars,
             control_vars,
             verbose=True):
    curr_epoch_iter = 1
    for batch_idx, (data, target) in enumerate(valid_loader):
        control_vars['batch_idx'] = batch_idx
        if batch_idx < control_vars['iter_size']:
            print_verbose(
                "\rPerforming first iteration; current mini-batch: " +
                str(batch_idx + 1) + "/" + str(control_vars['iter_size']),
                verbose,
                n_tabs=0,
                erase_line=True)
        # start time counter
        start = time.time()
        # get data and targetas cuda variables
        target_heatmaps, target_joints, target_handroot = target
        # make target joints be relative
        target_joints = target_joints[:, 3:]
        data, target_heatmaps = Variable(data), Variable(target_heatmaps)
        if valid_vars['use_cuda']:
            data = data.cuda()
            target_joints = target_joints.cuda()
            target_heatmaps = target_heatmaps.cuda()
            target_handroot = target_handroot.cuda()
        # visualize if debugging
        # get model output
        output = model(data)
        # accumulate loss for sub-mini-batch
        if model.cross_entropy:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        weights_heatmaps_loss, weights_joints_loss = get_loss_weights(
            control_vars['curr_iter'])
        loss, loss_heatmaps, loss_joints = my_losses.calculate_loss_JORNet(
            loss_func, output, target_heatmaps, target_joints,
            valid_vars['joint_ixs'], weights_heatmaps_loss,
            weights_joints_loss, control_vars['iter_size'])
        valid_vars['total_loss'] += loss
        valid_vars['total_joints_loss'] += loss_joints
        valid_vars['total_heatmaps_loss'] += loss_heatmaps
        # accumulate pixel dist loss for sub-mini-batch
        valid_vars[
            'total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
                valid_vars['total_pixel_loss'], output[3], target_heatmaps,
                control_vars['batch_size'])
        valid_vars[
            'total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                valid_vars['total_pixel_loss_sample'], output[3],
                target_heatmaps, control_vars['batch_size'])
        # get boolean variable stating whether a mini-batch has been completed

        for i in range(control_vars['max_mem_batch']):
            filenamebase_idx = (batch_idx * control_vars['max_mem_batch']) + i
            filenamebase = valid_loader.dataset.get_filenamebase(
                filenamebase_idx)

            print('')
            print(filenamebase)

            visualize.plot_image(data[i].data.numpy())
            visualize.show()

            output_batch_numpy = output[7][i].data.cpu().numpy()
            print('\n-------------------------------')
            reshaped_out = output_batch_numpy.reshape((20, 3))
            for j in range(20):
                print('[{}, {}, {}],'.format(reshaped_out[j, 0],
                                             reshaped_out[j, 1],
                                             reshaped_out[j, 2]))
            print('-------------------------------')
            fig, ax = visualize.plot_3D_joints(target_joints[i])
            visualize.plot_3D_joints(output_batch_numpy,
                                     fig=fig,
                                     ax=ax,
                                     color='C6')

            visualize.title(filenamebase)
            visualize.show()

            temp = np.zeros((21, 3))
            output_batch_numpy_abs = output_batch_numpy.reshape((20, 3))
            temp[1:, :] = output_batch_numpy_abs
            output_batch_numpy_abs = temp
            output_joints_colorspace = camera.joints_depth2color(
                output_batch_numpy_abs,
                depth_intr_matrix=synthhands_handler.DEPTH_INTR_MTX,
                handroot=target_handroot[i].data.cpu().numpy())
            visualize.plot_3D_joints(output_joints_colorspace)
            visualize.show()
            aa1 = target_joints[i].data.cpu().numpy().reshape((20, 3))
            aa2 = output[7][i].data.cpu().numpy().reshape((20, 3))
            print('\n----------------------------------')
            print(np.sum(np.abs(aa1 - aa2)) / 60)
            print('----------------------------------')

        #loss.backward()
        valid_vars['total_loss'] += loss
        valid_vars['total_joints_loss'] += loss_joints
        valid_vars['total_heatmaps_loss'] += loss_heatmaps
        # accumulate pixel dist loss for sub-mini-batch
        valid_vars[
            'total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
                valid_vars['total_pixel_loss'], output[3], target_heatmaps,
                control_vars['batch_size'])
        valid_vars[
            'total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                valid_vars['total_pixel_loss_sample'], output[3],
                target_heatmaps, control_vars['batch_size'])
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx + 1) % control_vars['iter_size'] == 0
        if minibatch_completed:
            # append total loss
            valid_vars['losses'].append(valid_vars['total_loss'].data[0])
            # erase total loss
            total_loss = valid_vars['total_loss'].data[0]
            valid_vars['total_loss'] = 0
            # append total joints loss
            valid_vars['losses_joints'].append(
                valid_vars['total_joints_loss'].data[0])
            # erase total joints loss
            valid_vars['total_joints_loss'] = 0
            # append total joints loss
            valid_vars['losses_heatmaps'].append(
                valid_vars['total_heatmaps_loss'].data[0])
            # erase total joints loss
            valid_vars['total_heatmaps_loss'] = 0
            # append dist loss
            valid_vars['pixel_losses'].append(valid_vars['total_pixel_loss'])
            # erase pixel dist loss
            valid_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            valid_vars['pixel_losses_sample'].append(
                valid_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            valid_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            #if valid_vars['losses'][-1] < valid_vars['best_loss']:
            #    valid_vars['best_loss'] = valid_vars['losses'][-1]
            #    print_verbose("  This is a best loss found so far: " + str(valid_vars['losses'][-1]), verbose)
            # log checkpoint
            if control_vars['curr_iter'] % control_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, 1, total_loss,
                                       valid_vars, control_vars)
                model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': valid_vars,
                }
                trainer.save_checkpoint(
                    model_dict,
                    filename=valid_vars['checkpoint_filenamebase'] +
                    str(control_vars['num_iter']) + '.pth.tar')
            # print time lapse
            prefix = 'Validating (Epoch #' + str(1) + ' ' + str(control_vars['curr_epoch_iter']) + '/' +\
                     str(control_vars['tot_iter']) + ')' + ', (Batch ' + str(control_vars['batch_idx']+1) +\
                     '(' + str(control_vars['iter_size']) + ')' + '/' +\
                     str(control_vars['num_batches']) + ')' + ', (Iter #' + str(control_vars['curr_iter']) +\
                     '(' + str(control_vars['batch_size']) + ')' +\
                     ' - log every ' + str(control_vars['log_interval']) + ' iter): '
            control_vars['tot_toc'] = display_est_time_loop(
                control_vars['tot_toc'] + time.time() - start,
                control_vars['curr_iter'],
                control_vars['num_iter'],
                prefix=prefix)

            control_vars['curr_iter'] += 1
            control_vars['start_iter'] = control_vars['curr_iter'] + 1
            control_vars['curr_epoch_iter'] += 1

    return valid_vars, control_vars
Beispiel #6
0
def validate(valid_loader,
             model,
             optimizer,
             valid_vars,
             control_vars,
             verbose=True):
    losses_main = []
    for batch_idx, (data, target) in enumerate(valid_loader):
        control_vars['batch_idx'] = batch_idx
        if batch_idx < control_vars['iter_size']:
            print_verbose(
                "\rPerforming first iteration; current mini-batch: " +
                str(batch_idx + 1) + "/" + str(control_vars['iter_size']),
                verbose,
                n_tabs=0,
                erase_line=True)
        # start time counter
        start = time.time()
        # get data and targetas cuda variables
        target_heatmaps, target_joints, target_handroot = target
        # make target joints be relative
        target_joints = target_joints[:, 3:]
        data, target_heatmaps = Variable(data), Variable(target_heatmaps)
        if valid_vars['use_cuda']:
            data = data.cuda()
            target_joints = target_joints.cuda()
            target_heatmaps = target_heatmaps.cuda()
            target_handroot = target_handroot.cuda()
        # visualize if debugging
        # get model output
        output = model(data)
        # accumulate loss for sub-mini-batch
        if model.cross_entropy:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        weights_heatmaps_loss, weights_joints_loss = get_loss_weights(
            control_vars['curr_iter'])
        loss, loss_heatmaps, loss_joints, loss_main = my_losses.calculate_loss_JORNet_for_valid(
            loss_func, output, target_heatmaps, target_joints,
            valid_vars['joint_ixs'], weights_heatmaps_loss,
            weights_joints_loss, control_vars['iter_size'])
        losses_main.append(loss_main.item() / 63.0)
        valid_vars['total_loss'] += loss
        valid_vars['total_joints_loss'] += loss_joints
        valid_vars['total_heatmaps_loss'] += loss_heatmaps
        # accumulate pixel dist loss for sub-mini-batch
        valid_vars[
            'total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
                valid_vars['total_pixel_loss'], output[3], target_heatmaps,
                control_vars['batch_size'])
        valid_vars[
            'total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                valid_vars['total_pixel_loss_sample'], output[3],
                target_heatmaps, control_vars['batch_size'])
        valid_vars['total_loss'] += loss
        valid_vars['total_joints_loss'] += loss_joints
        valid_vars['total_heatmaps_loss'] += loss_heatmaps
        # accumulate pixel dist loss for sub-mini-batch
        valid_vars[
            'total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
                valid_vars['total_pixel_loss'], output[3], target_heatmaps,
                control_vars['batch_size'])
        valid_vars[
            'total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                valid_vars['total_pixel_loss_sample'], output[3],
                target_heatmaps, control_vars['batch_size'])
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx + 1) % control_vars['iter_size'] == 0
        if minibatch_completed:
            # append total loss
            valid_vars['losses'].append(valid_vars['total_loss'].item())
            # erase total loss
            total_loss = valid_vars['total_loss'].item()
            valid_vars['total_loss'] = 0
            # append total joints loss
            valid_vars['losses_joints'].append(
                valid_vars['total_joints_loss'].item())
            # erase total joints loss
            valid_vars['total_joints_loss'] = 0
            # append total joints loss
            valid_vars['losses_heatmaps'].append(
                valid_vars['total_heatmaps_loss'].item())
            # erase total joints loss
            valid_vars['total_heatmaps_loss'] = 0
            # append dist loss
            valid_vars['pixel_losses'].append(valid_vars['total_pixel_loss'])
            # erase pixel dist loss
            valid_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            valid_vars['pixel_losses_sample'].append(
                valid_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            valid_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            #if valid_vars['losses'][-1] < valid_vars['best_loss']:
            #    valid_vars['best_loss'] = valid_vars['losses'][-1]
            #    print_verbose("  This is a best loss found so far: " + str(valid_vars['losses'][-1]), verbose)
            # log checkpoint
            if control_vars['curr_iter'] % control_vars['log_interval'] == 0:
                trainer.print_log_info(model,
                                       optimizer,
                                       1,
                                       total_loss,
                                       valid_vars,
                                       control_vars,
                                       save_best=False,
                                       save_a_checkpoint=False)
                model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': valid_vars,
                }
                #trainer.save_checkpoint(model_dict,
                #                        filename=valid_vars['checkpoint_filenamebase'] +
                #                                 str(control_vars['num_iter']) + '.pth.tar')
            # print time lapse
            prefix = 'Validating (Epoch #' + str(1) + ' ' + str(control_vars['curr_epoch_iter']) + '/' +\
                     str(control_vars['tot_iter']) + ')' + ', (Batch ' + str(control_vars['batch_idx']+1) +\
                     '(' + str(control_vars['iter_size']) + ')' + '/' +\
                     str(control_vars['num_batches']) + ')' + ', (Iter #' + str(control_vars['curr_iter']) +\
                     '(' + str(control_vars['batch_size']) + ')' +\
                     ' - log every ' + str(control_vars['log_interval']) + ' iter): '
            control_vars['tot_toc'] = display_est_time_loop(
                control_vars['tot_toc'] + time.time() - start,
                control_vars['curr_iter'],
                control_vars['num_iter'],
                prefix=prefix)

            control_vars['curr_iter'] += 1
            control_vars['start_iter'] = control_vars['curr_iter'] + 1
            control_vars['curr_epoch_iter'] += 1

    total_avg_loss = np.mean(losses_main)
    return valid_vars, control_vars, total_avg_loss
Beispiel #7
0
def train(train_loader, model, optimizer, train_vars):
    verbose = train_vars['verbose']
    for batch_idx, (data, target) in enumerate(train_loader):
        train_vars['batch_idx'] = batch_idx
        # print info about performing first iter
        if batch_idx < train_vars['iter_size']:
            print_verbose("\rPerforming first iteration; current mini-batch: " +
                  str(batch_idx+1) + "/" + str(train_vars['iter_size']), verbose, n_tabs=0, erase_line=True)
        # check if arrived at iter to start
        arrived_curr_iter, train_vars = run_until_curr_iter(batch_idx, train_vars)
        if not arrived_curr_iter:
            continue
        # save checkpoint after final iteration
        if train_vars['curr_iter'] - 1 == train_vars['num_iter']:
            train_vars = save_final_checkpoint(train_vars, model, optimizer)
            break
        # start time counter
        start = time.time()
        # get data and target as torch Variables
        _, target_joints, target_heatmaps, target_joints_z = target
        data, target_heatmaps = Variable(data), Variable(target_heatmaps)
        if train_vars['use_cuda']:
            data = data.cuda()
            target_heatmaps = target_heatmaps.cuda()
        # get model output
        output = model(data)
        # accumulate loss for sub-mini-batch
        if model.cross_entropy:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        loss = my_losses.calculate_loss_HALNet(loss_func,
            output, target_heatmaps, model.joint_ixs, model.WEIGHT_LOSS_INTERMED1,
            model.WEIGHT_LOSS_INTERMED2, model.WEIGHT_LOSS_INTERMED3,
            model.WEIGHT_LOSS_MAIN, train_vars['iter_size'])
        loss.backward()
        train_vars['total_loss'] += loss
        # accumulate pixel dist loss for sub-mini-batch
        train_vars['total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
            train_vars['total_pixel_loss'], output[3], target_heatmaps, train_vars['batch_size'])
        if train_vars['cross_entropy']:
            train_vars['total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
                train_vars['total_pixel_loss_sample'], output[3], target_heatmaps, train_vars['batch_size'])
        else:
            train_vars['total_pixel_loss_sample'] = [-1] * len(model.joint_ixs)
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx+1) % train_vars['iter_size'] == 0
        if minibatch_completed:
            # optimise for mini-batch
            optimizer.step()
            # clear optimiser
            optimizer.zero_grad()
            # append total loss
            train_vars['losses'].append(train_vars['total_loss'].item())
            # erase total loss
            total_loss = train_vars['total_loss'].item()
            train_vars['total_loss'] = 0
            # append dist loss
            train_vars['pixel_losses'].append(train_vars['total_pixel_loss'])
            # erase pixel dist loss
            train_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            train_vars['pixel_losses_sample'].append(train_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            train_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            if train_vars['losses'][-1] < train_vars['best_loss']:
                train_vars['best_loss'] = train_vars['losses'][-1]
                print_verbose("  This is a best loss found so far: " + str(train_vars['losses'][-1]), verbose)
                train_vars['best_model_dict'] = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_vars': train_vars
                }
            # log checkpoint
            if train_vars['curr_iter'] % train_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, epoch, total_loss, train_vars, train_vars)

            if train_vars['curr_iter'] % train_vars['log_interval_valid'] == 0:
                print_verbose("\nSaving model and checkpoint model for validation", verbose)
                checkpoint_model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'train_vars': train_vars,
                }
                trainer.save_checkpoint(checkpoint_model_dict,
                                        filename=train_vars['checkpoint_filenamebase'] + 'for_valid_' +
                                                 str(train_vars['curr_iter']) + '.pth.tar')

            # print time lapse
            prefix = 'Training (Epoch #' + str(epoch) + ' ' + str(train_vars['curr_epoch_iter']) + '/' +\
                     str(train_vars['tot_iter']) + ')' + ', (Batch ' + str(train_vars['batch_idx']+1) +\
                     '(' + str(train_vars['iter_size']) + ')' + '/' +\
                     str(train_vars['num_batches']) + ')' + ', (Iter #' + str(train_vars['curr_iter']) +\
                     '(' + str(train_vars['batch_size']) + ')' +\
                     ' - log every ' + str(train_vars['log_interval']) + ' iter): '
            train_vars['tot_toc'] = display_est_time_loop(train_vars['tot_toc'] + time.time() - start,
                                                            train_vars['curr_iter'], train_vars['num_iter'],
                                                            prefix=prefix)

            train_vars['curr_iter'] += 1
            train_vars['start_iter'] = train_vars['curr_iter'] + 1
            train_vars['curr_epoch_iter'] += 1
    return train_vars
def train(train_loader, model, optimizer, train_vars, control_vars, verbose=True):
    curr_epoch_iter = 1
    for batch_idx, (data, target) in enumerate(train_loader):
        control_vars['batch_idx'] = batch_idx
        if batch_idx < control_vars['iter_size']:
            print_verbose("\rPerforming first iteration; current mini-batch: " +
                  str(batch_idx+1) + "/" + str(control_vars['iter_size']), verbose, n_tabs=0, erase_line=True)
        # check if arrived at iter to start
        if control_vars['curr_epoch_iter'] < control_vars['start_iter_mod']:
            control_vars['curr_epoch_iter'] = control_vars['start_iter_mod']
            msg = ''
            if batch_idx % control_vars['iter_size'] == 0:
                msg += print_verbose("\rGoing through iterations to arrive at last one saved... " +
                      str(int(control_vars['curr_epoch_iter']*100.0/control_vars['start_iter_mod'])) + "% of " +
                      str(control_vars['start_iter_mod']) + " iterations (" +
                      str(control_vars['curr_epoch_iter']) + "/" + str(control_vars['start_iter_mod']) + ")",
                              verbose, n_tabs=0, erase_line=True)
                control_vars['curr_epoch_iter'] += 1
                control_vars['curr_iter'] += 1
                curr_epoch_iter += 1
            if not control_vars['output_filepath'] == '':
                with open(control_vars['output_filepath'], 'a') as f:
                    f.write(msg + '\n')
            continue
        # save checkpoint after final iteration
        if control_vars['curr_iter'] == control_vars['num_iter']:
            print_verbose("\nReached final number of iterations: " + str(control_vars['num_iter']), verbose)
            print_verbose("\tSaving final model checkpoint...", verbose)
            final_model_dict = {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'control_vars': control_vars,
                'train_vars': train_vars,
            }
            trainer.save_checkpoint(final_model_dict,
                            filename=train_vars['checkpoint_filenamebase'] +
                                     'final' + str(control_vars['num_iter']) + '.pth.tar')
            control_vars['done_training'] = True
            break
        # start time counter
        start = time.time()
        # get data and targetas cuda variables
        target_heatmaps, target_joints, target_roothand = target
        data, target_heatmaps, target_joints, target_roothand = Variable(data), Variable(target_heatmaps),\
                                               Variable(target_joints), Variable(target_roothand)
        if train_vars['use_cuda']:
            data = data.cuda()
            target_heatmaps = target_heatmaps.cuda()
            target_joints = target_joints.cuda()
        # get model output
        output = model(data)
        '''
        visualize.plot_joints_from_heatmaps(target_heatmaps[0, :, :, :].cpu().data.numpy(),
                                            title='', data=data[0].cpu().data.numpy())
        visualize.show()
        visualize.plot_image_and_heatmap(target_heatmaps[0][4].cpu().data.numpy(),
                                         data=data[0].cpu().data.numpy(),
                                         title='')
        visualize.show()
        visualize.plot_image_and_heatmap(output[3][0][4].cpu().data.numpy(),
                                         data=data[0].cpu().data.numpy(),
                                         title='')
        visualize.show()
        '''
        # accumulate loss for sub-mini-batch
        if train_vars['cross_entropy']:
            loss_func = my_losses.cross_entropy_loss_p_logq
        else:
            loss_func = my_losses.euclidean_loss
        weights_heatmaps_loss, weights_joints_loss = get_loss_weights(control_vars['curr_iter'])
        loss, loss_heatmaps, loss_joints = my_losses.calculate_loss_JORNet(
            loss_func, output, target_heatmaps, target_joints, train_vars['joint_ixs'],
            weights_heatmaps_loss, weights_joints_loss, control_vars['iter_size'])
        loss.backward()
        train_vars['total_loss'] += loss.data[0]
        train_vars['total_joints_loss'] += loss_joints.data[0]
        train_vars['total_heatmaps_loss'] += loss_heatmaps.data[0]
        # accumulate pixel dist loss for sub-mini-batch
        train_vars['total_pixel_loss'] = my_losses.accumulate_pixel_dist_loss_multiple(
            train_vars['total_pixel_loss'], output[3], target_heatmaps, control_vars['batch_size'])
        train_vars['total_pixel_loss_sample'] = my_losses.accumulate_pixel_dist_loss_from_sample_multiple(
            train_vars['total_pixel_loss_sample'], output[3], target_heatmaps, control_vars['batch_size'])
        # get boolean variable stating whether a mini-batch has been completed
        minibatch_completed = (batch_idx+1) % control_vars['iter_size'] == 0
        if minibatch_completed:
            # optimise for mini-batch
            optimizer.step()
            # clear optimiser
            optimizer.zero_grad()
            # append total loss
            train_vars['losses'].append(train_vars['total_loss'])
            # erase total loss
            total_loss = train_vars['total_loss']
            train_vars['total_loss'] = 0
            # append total joints loss
            train_vars['losses_joints'].append(train_vars['total_joints_loss'])
            # erase total joints loss
            train_vars['total_joints_loss'] = 0
            # append total joints loss
            train_vars['losses_heatmaps'].append(train_vars['total_heatmaps_loss'])
            # erase total joints loss
            train_vars['total_heatmaps_loss'] = 0
            # append dist loss
            train_vars['pixel_losses'].append(train_vars['total_pixel_loss'])
            # erase pixel dist loss
            train_vars['total_pixel_loss'] = [0] * len(model.joint_ixs)
            # append dist loss of sample from output
            train_vars['pixel_losses_sample'].append(train_vars['total_pixel_loss_sample'])
            # erase dist loss of sample from output
            train_vars['total_pixel_loss_sample'] = [0] * len(model.joint_ixs)
            # check if loss is better
            if train_vars['losses'][-1] < train_vars['best_loss']:
                train_vars['best_loss'] = train_vars['losses'][-1]
                print_verbose("  This is a best loss found so far: " + str(train_vars['losses'][-1]), verbose)
                train_vars['best_model_dict'] = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': train_vars,
                }
            if train_vars['losses_joints'][-1] < train_vars['best_loss_joints']:
                train_vars['best_loss_joints'] = train_vars['losses_joints'][-1]
            if train_vars['losses_heatmaps'][-1] < train_vars['best_loss_heatmaps']:
                train_vars['best_loss_heatmaps'] = train_vars['losses_heatmaps'][-1]
            # log checkpoint
            if control_vars['curr_iter'] % control_vars['log_interval'] == 0:
                trainer.print_log_info(model, optimizer, epoch, total_loss, train_vars, control_vars)
                aa1 = target_joints[0].data.cpu().numpy()
                aa2 = output[7][0].data.cpu().numpy()
                output_joint_loss = np.sum(np.abs(aa1 - aa2)) / 63
                msg = ''
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                msg += print_verbose('\tJoint Coord Avg Loss for first image of current mini-batch: ' +
                                     str(output_joint_loss) + '\n', control_vars['verbose'])
                msg += print_verbose(
                    "-------------------------------------------------------------------------------------------",
                    verbose) + "\n"
                if not control_vars['output_filepath'] == '':
                    with open(control_vars['output_filepath'], 'a') as f:
                        f.write(msg + '\n')
            if control_vars['curr_iter'] % control_vars['log_interval_valid'] == 0:
                print_verbose("\nSaving model and checkpoint model for validation", verbose)
                checkpoint_model_dict = {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'control_vars': control_vars,
                    'train_vars': train_vars,
                }
                trainer.save_checkpoint(checkpoint_model_dict,
                                        filename=train_vars['checkpoint_filenamebase'] + 'for_valid_' +
                                                 str(control_vars['curr_iter']) + '.pth.tar')



            # print time lapse
            prefix = 'Training (Epoch #' + str(epoch) + ' ' + str(control_vars['curr_epoch_iter']) + '/' +\
                     str(control_vars['tot_iter']) + ')' + ', (Batch ' + str(control_vars['batch_idx']+1) +\
                     '(' + str(control_vars['iter_size']) + ')' + '/' +\
                     str(control_vars['num_batches']) + ')' + ', (Iter #' + str(control_vars['curr_iter']) +\
                     '(' + str(control_vars['batch_size']) + ')' +\
                     ' - log every ' + str(control_vars['log_interval']) + ' iter): '
            control_vars['tot_toc'] = display_est_time_loop(control_vars['tot_toc'] + time.time() - start,
                                                            control_vars['curr_iter'], control_vars['num_iter'],
                                                            prefix=prefix)

            control_vars['curr_iter'] += 1
            control_vars['start_iter'] = control_vars['curr_iter'] + 1
            control_vars['curr_epoch_iter'] += 1


    return train_vars, control_vars