Ejemplo n.º 1
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              loss,
              logger,
              is_validate=False,
              offset=0):
        statistics = []
        total_loss = 0
        gpu_mem = tools.gpumemusage()

        if is_validate:
            model.eval()
            title = 'Validating {} Epoch {}'.format(gpu_mem, epoch)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=100,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training {} Epoch {}'.format(tools.gpumemusage(), epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=120,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):

            data, target = [Variable(d, volatile=is_validate) for d in data], [
                Variable(t, volatile=is_validate) for t in target
            ]
            if args.cuda:
                data, target = [d.cuda(async=True) for d in data
                                ], [t.cuda(async=True) for t in target]

            optimizer.zero_grad() if not is_validate else None

            output = model(data[0])

            loss_labels, loss_values = loss(output, target[0])

            loss_val = loss_values[0]
            total_loss += loss_val.data[0]
            loss_values = [v.data[0] for v in loss_values]

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch,
                                                     global_iteration,
                                                     optimizer)
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = '{} {} Epoch {}'.format(
                'Validating' if is_validate else 'Training',
                tools.gpumemusage(), epoch)

            progress.set_description(
                title + ' ' +
                tools.format_dictionary_of_losses(loss_labels, statistics[-1]))

            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or
                (is_validate and batch_idx == args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (progress._time() - last_log_time),
                    global_iteration)
                last_log_time = progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(loss_labels):
                    logger.add_scalar('average batch ' + key,
                                      all_losses[:,
                                                 i].mean(), global_iteration)
                    logger.add_histogram(key, all_losses[:, i],
                                         global_iteration)

            # Reset Summary
            statistics = []

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 2
0
    def inference(args, epoch, data_loader, model, loss, offset=0):

        model.eval()

        if args.save_flow or args.render_validation:
            flow_folder = "{}/{}.epoch-{}-flow-field".format(
                args.inference_dir, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)

        gpu_mem = tools.gpumemusage()
        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader,
                        ncols=100,
                        total=np.minimum(len(data_loader),
                                         args.inference_n_batches),
                        desc='Inferencing %s' % (gpu_mem),
                        leave=True,
                        position=offset)

        statistics = []
        total_loss = 0
        for batch_idx, (data, target) in enumerate(progress):
            if args.cuda:
                data, target = [d.cuda(async=True) for d in data
                                ], [t.cuda(async=True) for t in target]
            data, target = [Variable(d, volatile=True) for d in data
                            ], [Variable(t, volatile=True) for t in target]

            output = [model(data[0])]

            if len(target) == 0:
                target = [output[0]]

            loss_labels, loss_values = loss(output[0], target[0])

            loss_val = loss_values[0]

            total_loss += loss_val.data[0]
            statistics.append([v.data[0] for v in loss_values])

            _pflow = output[0].data.cpu().numpy().transpose(0, 2, 3, 1)

            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    flow_utils.writeFlow(
                        join(
                            flow_folder, '%06d.flo' %
                            (batch_idx * args.inference_batch_size + i)),
                        _pflow[i])

            progress.set_description('Inference {} Averages for Epoch {}: '.
                                     format(tools.gpumemusage(), epoch) +
                                     tools.format_dictionary_of_losses(
                                         loss_labels,
                                         np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()

        return
Ejemplo n.º 3
0
    def inference(args, epoch, data_loader, model, offset=0):

        model.eval()

        if args.save_flow or args.render_validation:
            flow_folder = "{}/{}.epoch-{}-flow-field".format(
                args.inference_dir, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)

        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader,
                        ncols=100,
                        total=np.minimum(len(data_loader),
                                         args.inference_n_batches),
                        desc='Inferencing ',
                        leave=True,
                        position=offset)

        statistics = []
        total_loss = 0
        for batch_idx, (data, target) in enumerate(progress):
            if args.cuda:
                data, target = [d.cuda(async=True) for d in data
                                ], [t.cuda(async=True) for t in target]
            data, target = [Variable(d, volatile=True) for d in data
                            ], [Variable(t, volatile=True) for t in target]

            # when ground-truth flows are not available for inference_dataset,
            # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows,
            # depending on the type of loss norm passed in
            losses, output = model(data[0], target[0], inference=True)

            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.data[0]
            loss_values = [v.data[0] for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            statistics.append(loss_values)
            # import IPython; IPython.embed()
            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    _pflow = output[i].data.cpu().numpy().transpose(1, 2, 0)
                    flow_utils.writeFlow(
                        join(
                            flow_folder, '%06d.flo' %
                            (batch_idx * args.inference_batch_size + i)),
                        _pflow)

            progress.set_description(
                'Inference Averages for Epoch {}: '.format(epoch) +
                tools.format_dictionary_of_losses(
                    loss_labels,
                    np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()

        return
Ejemplo n.º 4
0
        def inference(args, epoch, data_loader, model, offset=0):
            model.eval()
            if args.save_flow or args.render_validation:
                flow_folder = "{}/inference/{}.epoch-{}-flow-field".format(args.save, args.name.replace('/', '.'),
                                                                           epoch)
                if not os.path.exists(flow_folder):
                    os.makedirs(flow_folder)

            # visualization folder
            if args.inference_visualize:
                flow_vis_folder = "{}/inference/{}.epoch-{}-flow-vis".format(args.save, args.name.replace('/', '.'),
                                                                             epoch)
                if not os.path.exists(flow_vis_folder):
                    os.makedirs(flow_vis_folder)

            args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

            progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches),
                            desc='Inferencing ',
                            leave=True, position=offset)

            statistics = []
            total_loss = 0

            for batch_idx, (data, target, city_name, video_name, image_list) in enumerate(progress):
                city_name = city_name[0]
                video_name = video_name[0]
            
                #print('cur ',image_list[0][0],' ', image_list[1][0])
                #print('city name = ', city_name)
               # print('video name = ', video_name)
                #if batch_idx == 0:
                 #   name = []
                  #  for i in filename:
                   #     name.append(i[0])
                        #print('st: ',i)
                #print('name = ', name)
                #if name[batch_idx] == '':
                 #   continue
                if args.cuda:
                    data, target = [d.cuda(non_blocking=True) for d in data], [t.cuda(non_blocking=True) for t in
                                                                               target]
                data, target = [Variable(d) for d in data], [
                    Variable(t) for t in target]

                # when ground-truth flows are not available for inference_dataset,
                # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows,
                # depending on the type of loss norm passed in
                with torch.no_grad():
                    losses, output = model(data[0], target[0], inference=True)

                losses = [torch.mean(loss_value) for loss_value in losses]
                loss_val = losses[0]  # Collect first loss for weight update
                total_loss += loss_val.item()
                loss_values = [v.item() for v in losses]

                # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
                loss_labels = list(model.module.loss.loss_labels)

                statistics.append(loss_values)
                # import IPython; IPython.embed()
                if args.save_flow or args.render_validation:
                    for i in range(args.inference_batch_size):
                        _pflow = output[i].data.cpu(
                        ).numpy().transpose(1, 2, 0)
                        # print(_pflow.shape)
                        img0 = scipy.misc.toimage(_pflow[:, :, 0])
                        img1 = scipy.misc.toimage(_pflow[:, :, 1])

                        #id = name[batch_idx][15:-4]
                        id = image_list[1][0].split('/')[-1]
                        #print('Saving : ', id)
                        if not os.path.exists(crop_image_path + '/horizontal/' + city_name):
                            os.makedirs(crop_image_path +
                                        '/horizontal/' + city_name)
                        if not os.path.exists(crop_image_path + '/vertical/' + city_name):
                            os.makedirs(crop_image_path +
                                        '/vertical/' + city_name)
                        if not os.path.exists(crop_image_path + '/horizontal/' + city_name + '/' + video_name):
                            os.makedirs(
                                crop_image_path + '/horizontal/' + city_name + '/' + video_name)
                        if not os.path.exists(crop_image_path + '/vertical/' + city_name + '/' + video_name):
                            os.makedirs(
                                crop_image_path + '/vertical/' + city_name + '/' + video_name)
                        #block.log('?????? %s '%(name[batch_idx * args.effective_inference_batch_size + i]))
                        img0.save(
                            crop_image_path + '/horizontal/' + city_name + '/' + video_name + '/' + id)
                        img1.save(crop_image_path + '/vertical/' + city_name +
                                  '/' + video_name + '/' + id)
                        # img0.save(
                        #     '/home/wangsen/flownet2_testpic/optical_flow/horizontal/' + str(id) + '_' + str(
                        #         batch_idx) + '.png')
                        # img1.save(
                        #     '/home/wangsen/flownet2_testpic/optical_flow/vertical/' + str(id) + '_' + str(
                        #         batch_idx) + '.png')
                        #flow_utils.writeFlow(join(flow_folder, '%06d.flo' % (batch_idx * args.effective_inference_batch_size + i)),
                         #                     _pflow)

                        # You can comment out the plt block in visulize_flow_file() for real-time visualization
                        if args.inference_visualize:
                            flow_utils.visulize_flow_file(
                                join(flow_folder, '%06d.flo' %
                                     (batch_idx * args.inference_batch_size + i)),
                                flow_vis_folder)

                progress.set_description(
                    'Inference Averages for Epoch {}: '.format(epoch) + tools.format_dictionary_of_losses(loss_labels,
                                                                                                          np.array(
                                                                                                              statistics).mean(
                                                                                                              axis=0)))
                progress.update(1)

                if batch_idx == (args.inference_n_batches - 1):
                    break

            progress.close()

            return
Ejemplo n.º 5
0
    def train(input_args,
              train_epoch,
              start_iteration,
              files_loader,
              model,
              model_optimizer,
              logger,
              is_validate=False,
              offset=0):
        statistics = []
        total_loss = 0

        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(train_epoch)
            input_args.validation_n_batches = np.inf if input_args.validation_n_batches < 0 else input_args.validation_n_batches
            file_progress = tqdm(tools.IteratorTimer(files_loader),
                                 ncols=100,
                                 total=np.minimum(
                                     len(files_loader),
                                     input_args.validation_n_batches),
                                 leave=True,
                                 position=offset,
                                 desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(train_epoch)
            input_args.train_n_batches = np.inf if input_args.train_n_batches < 0 else input_args.train_n_batches
            file_progress = tqdm(tools.IteratorTimer(files_loader),
                                 ncols=120,
                                 total=np.minimum(len(files_loader),
                                                  input_args.train_n_batches),
                                 smoothing=.9,
                                 miniters=1,
                                 leave=True,
                                 position=offset,
                                 desc=title)

        last_log_time = file_progress._time()
        for batch_idx, (data_file) in enumerate(file_progress):
            video_dataset = datasets_video.VideoFileDataJIT(
                input_args, data_file[0])
            video_loader = DataLoader(video_dataset,
                                      batch_size=args.effective_batch_size,
                                      shuffle=True,
                                      **gpuargs)

            global_iteration = start_iteration + batch_idx

            # note~ for debugging purposes
            # video_frame_progress = tqdm(tools.IteratorTimer(video_loader), ncols=120,
            #                            total=len(video_loader), smoothing=0.9, miniters=1,
            #                            leave=True, desc=data_file[0])

            for i_batch, (data, target) in enumerate(video_loader):
                data, target = [Variable(d)
                                for d in data], [Variable(t) for t in target]
                if input_args.cuda and input_args.number_gpus == 1:
                    data, target = [d.cuda(async=True) for d in data
                                    ], [t.cuda(async=True) for t in target]

                model_optimizer.zero_grad() if not is_validate else None
                losses = model(data[0], target[0])
                losses = [torch.mean(loss_value) for loss_value in losses]
                loss_val = losses[0]  # Collect first loss for weight update
                total_loss += loss_val.data
                loss_values = [v.data for v in losses]

                # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
                loss_labels = list(model.module.loss.loss_labels)

                assert not np.isnan(total_loss.cpu().numpy())

                if not is_validate and input_args.fp16:
                    loss_val.backward()
                    if input_args.gradient_clip:
                        torch.nn.utils.clip_grad_norm(model.parameters(),
                                                      input_args.gradient_clip)

                    params = list(model.parameters())
                    for i in range(len(params)):
                        param_copy[i].grad = params[i].grad.clone().type_as(
                            params[i]).detach()
                        param_copy[i].grad.mul_(1. / input_args.loss_scale)
                    model_optimizer.step()
                    for i in range(len(params)):
                        params[i].data.copy_(param_copy[i].data)
                elif not is_validate:
                    loss_val.backward()
                    if input_args.gradient_clip:
                        torch.nn.utils.clip_grad_norm(model.parameters(),
                                                      input_args.gradient_clip)
                    model_optimizer.step()

                # Update hyperparameters if needed
                if not is_validate:
                    tools.update_hyperparameter_schedule(
                        input_args, train_epoch, global_iteration,
                        model_optimizer)
                    loss_labels.append('lr')
                    loss_values.append(model_optimizer.param_groups[0]['lr'])

                    loss_labels.append('load')
                    loss_values.append(file_progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', train_epoch)

            file_progress.set_description(
                title + ' ' + tools.format_dictionary_of_losses(
                    tools.flatten_list(loss_labels), statistics[-1]))

            if ((((global_iteration + 1) % input_args.log_frequency) == 0
                 and not is_validate)
                    or (is_validate
                        and batch_idx == input_args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (file_progress._time() - last_log_time),
                    global_iteration)
                last_log_time = file_progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(tools.flatten_list(loss_labels)):
                    if isinstance(all_losses[:, i].item(), torch.Tensor):
                        average_batch = all_losses[:, i].item().mean()
                    else:
                        average_batch = all_losses[:, i].item()

                    logger.add_scalar('average batch ' + str(key),
                                      average_batch, global_iteration)
                    logger.add_histogram(str(key), all_losses[:, i],
                                         global_iteration)

            # Reset Summary
            statistics = []

            if is_validate and (batch_idx == input_args.validation_n_batches):
                break

            if (not is_validate) and (batch_idx
                                      == (input_args.train_n_batches)):
                break

        file_progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 6
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              logger,
              is_validate=False,
              offset=0):
        statistics = []
        total_loss = 0

        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(epoch)
            args.validation_n_batches = len(
                data_loader
            ) - 1 if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=100,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(epoch)
            args.train_n_batches = len(
                data_loader
            ) - 1 if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=120,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):

            data, target = [Variable(d, volatile=is_validate) for d in data], [
                Variable(t, volatile=is_validate) for t in target
            ]
            if args.cuda and args.number_gpus == 1:
                data, target = [d.cuda(async=True) for d in data
                                ], [t.cuda(async=True) for t in target]

            optimizer.zero_grad() if not is_validate else None
            losses = model(data[0], target[0])
            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[1]  # Collect first loss for weight update
            total_loss += loss_val.data[0]
            loss_values = [v.data[0] for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch,
                                                     global_iteration,
                                                     optimizer)
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', epoch)

            if (type(loss_labels[0]) is list) or (type(loss_labels[0]) is
                                                  tuple):
                progress.set_description(title + ' ' +
                                         tools.format_dictionary_of_losses(
                                             loss_labels[0], statistics[-1]))
            else:
                progress.set_description(title + ' ' +
                                         tools.format_dictionary_of_losses(
                                             loss_labels, statistics[-1]))

            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or
                (is_validate and batch_idx == args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (progress._time() - last_log_time),
                    global_iteration)
                last_log_time = progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(loss_labels[0] if (
                        type(loss_labels[0]) is list) or (
                            type(loss_labels[0]) is tuple) else loss_labels):
                    logger.add_scalar('average batch ' + str(key),
                                      all_losses[:,
                                                 i].mean(), global_iteration)
                    #logger.add_histogram(str(key), all_losses[:, i], global_iteration)
                if is_validate:
                    _, output = model(data[0], target[0], inference=True)
                    render_flow = output[0].data.cpu().numpy().transpose(
                        1, 2, 0)
                    ground_truth = target[0][0].data.cpu().numpy().transpose(
                        1, 2, 0)
                    render_img = tools.flow_to_image(render_flow).transpose(
                        2, 0, 1)
                    true_img = tools.flow_to_image(ground_truth).transpose(
                        2, 0, 1)
                    render_img = torch.Tensor(render_img) / 255.0
                    true_img = torch.Tensor(true_img) / 255.0
                    input_img = data[0][0, :, 0, :, :].data.cpu() / 255.0
                    logger.add_image('renderimg',
                                     torchvision.utils.make_grid(render_img),
                                     global_iteration)
                    logger.add_image('ground_truth',
                                     torchvision.utils.make_grid(true_img),
                                     global_iteration)
                    logger.add_image('input_img',
                                     torchvision.utils.make_grid(input_img),
                                     global_iteration)

            # Reset Summary
            statistics = []

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 7
0
    def inference(args, epoch, data_loader, model, offset=0):

        model.eval()

        # if args.save_prefeat:
        #     feat_folder = 'prefeat_kitti_odo'
        #     if not os.path.exists(feat_folder): os.mkdir(feat_folder)
        #     feat_folder = 'prefeat_kitti_odo/{}'.format(args.model)
        #     if not os.path.exists(feat_folder): os.mkdir(feat_folder)
        #     for _seq in ['00', '01', '02', '04', '05', '06', '07', '08', '09', '10']:
        #         feat_folder = 'prefeat_kitti_odo/{}/{}'.format(args.model, _seq)
        #         if not os.path.exists(feat_folder): os.mkdir(feat_folder)
        
        if args.save_flow or args.render_validation:
            flow_folder = "{}/inference/{}.epoch-{}-flow-field".format(args.save,args.name.replace('/', '.'),epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)
        
        # visualization folder
        if args.inference_visualize:
            flow_vis_folder = "{}/inference/{}.epoch-{}-flow-vis".format(args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_vis_folder):
                os.makedirs(flow_vis_folder)
        
        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ', 
            leave=True, position=offset)

        statistics = []
        total_loss = 0
        for batch_idx, (data, target, outname) in enumerate(progress):
            # # outname: e.g. [['00-000000_000001']]
            # outname = outname[0][0] # '00-000000_000001'
            # outseq  = outname.split('-')[0] # '00'
            if args.cuda:
                data, target = [d.cuda(non_blocking=True) for d in data], [t.cuda(non_blocking=True) for t in target]
            data, target = [Variable(d) for d in data], [Variable(t) for t in target]

            # when ground-truth flows are not available for inference_dataset, 
            # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows, 
            # depending on the type of loss norm passed in
            with torch.no_grad():
                losses, output, out_feats = model(data[0], target[0], inference=True)

            losses = [torch.mean(loss_value) for loss_value in losses] 
            loss_val = losses[0] # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            statistics.append(loss_values)
            # import IPython; IPython.embed()

            # if args.save_prefeat:
            #     torch.save(out_feats, 'prefeat_kitti_odo/{}/{}/{}.pt'.format(args.model, outseq, outname))

            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    _pflow = output[i].data.cpu().numpy().transpose(1, 2, 0)
                    flow_utils.writeFlow( join(flow_folder, '%06d.flo'%(batch_idx * args.inference_batch_size + i)),  _pflow)
                    
                    # You can comment out the plt block in visulize_flow_file() for real-time visualization
                    if args.inference_visualize:
                        flow_utils.visulize_flow_file(
                            join(flow_folder, '%06d.flo' % (batch_idx * args.inference_batch_size + i)),flow_vis_folder)
                   
                            
            progress.set_description('Inference Averages for Epoch {}: '.format(epoch) + tools.format_dictionary_of_losses(loss_labels, np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()

        return
    def train(args, epoch, start_iteration, data_loader, model, optimizer, logger, is_validate=False, offset=0):
        statistics = []
        total_loss = 0

        if is_validate:
            model.eval()
            title = "Validating Epoch {}".format(epoch)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(
                tools.IteratorTimer(data_loader),
                ncols=100,
                total=np.minimum(len(data_loader), args.validation_n_batches),
                leave=True,
                position=offset,
                desc=title,
            )
        else:
            model.train()
            title = "Training Epoch {}".format(epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(
                tools.IteratorTimer(data_loader),
                ncols=120,
                total=np.minimum(len(data_loader), args.train_n_batches),
                smoothing=0.9,
                miniters=1,
                leave=True,
                position=offset,
                desc=title,
            )

        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):

            data, target = [Variable(d) for d in data], [Variable(t) for t in target]
            if args.cuda and args.number_gpus == 1:
                data, target = [d.cuda(non_blocking=True) for d in data], [t.cuda(non_blocking=True) for t in target]

            optimizer.zero_grad() if not is_validate else None
            losses = model(data[0], target[0])
            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(), args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(params[i]).detach()
                    param_copy[i].grad.mul_(1.0 / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(), args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch, global_iteration, optimizer)
                loss_labels.append("lr")
                loss_values.append(optimizer.param_groups[0]["lr"])

            loss_labels.append("load")
            loss_values.append(progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = "{} Epoch {}".format("Validating" if is_validate else "Training", epoch)

            progress.set_description(title + " " + tools.format_dictionary_of_losses(loss_labels, statistics[-1]))

            if (((global_iteration + 1) % args.log_frequency) == 0 and not is_validate) or (
                is_validate and batch_idx == args.validation_n_batches - 1
            ):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    "batch logs per second", len(statistics) / (progress._time() - last_log_time), global_iteration
                )
                last_log_time = progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(loss_labels):
                    logger.add_scalar("average batch " + str(key), all_losses[:, i].mean(), global_iteration)
                    logger.add_histogram(str(key), all_losses[:, i], global_iteration)

            # Reset Summary
            statistics = []

            if is_validate and (batch_idx == args.validation_n_batches):
                break

            if (not is_validate) and (batch_idx == (args.train_n_batches)):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 9
0
    def inference(args, epoch, data_loader, model, offset=0):
        if os.path.exists(
                join(
                    '/usr/xtmp/ct214/daml/vr_sickness/perspective_skyhouse_of_results/',
                    args.name + ".npy")
        ) and not args.name == "right_eye_theta_0_phi_-82.5":
            print(
                "seen ",
                join(
                    '/usr/xtmp/ct214/daml/vr_sickness/perspective_skyhouse_of_results/',
                    args.name))
            return
        model.eval()
        tosave = np.zeros((1800, 2))
        print("args name is !!!!!!!!!!!!!!!!!!!!!!", args.name)
        if args.save_flow or args.render_validation:
            flow_folder = "{}/inference/{}.epoch-{}-flow-field".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)

        # visualization folder
        if args.inference_visualize:
            flow_vis_folder = "{}/inference/{}.epoch-{}-flow-vis".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_vis_folder):
                os.makedirs(flow_vis_folder)

        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader,
                        ncols=100,
                        total=np.minimum(len(data_loader),
                                         args.inference_n_batches),
                        desc='Inferencing ',
                        leave=True,
                        position=offset)

        statistics = []
        total_loss = 0
        for batch_idx, (data, target) in enumerate(progress):
            if args.cuda:
                data, target = [d.cuda(non_blocking=True) for d in data
                                ], [t.cuda(non_blocking=True) for t in target]
            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]

            # when ground-truth flows are not available for inference_dataset,
            # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows,
            # depending on the type of loss norm passed in
            with torch.no_grad():
                losses, output = model(data[0], target[0], inference=True)

            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            statistics.append(loss_values)
            # import IPython; IPython.embed()
            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    _pflow = output[i].data.cpu().numpy().transpose(1, 2, 0)
                    avg_pflow = np.average(_pflow, axis=(0, 1))
                    tosave[batch_idx * args.inference_batch_size +
                           i] = avg_pflow
                    # flow_utils.writeFlow( join(flow_folder, '%06d.flo'%(batch_idx * args.inference_batch_size + i)),  _pflow)

            progress.set_description(
                'Inference Averages for Epoch {}: '.format(epoch) +
                tools.format_dictionary_of_losses(
                    loss_labels,
                    np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()
        np.save(
            join(
                '/usr/xtmp/ct214/daml/vr_sickness/perspective_skyhouse_of_results/',
                args.name), tosave)
        return
Ejemplo n.º 10
0
    def inference(args, epoch, data_loader, model, offset=0):

        model.eval()

        if args.save_flow or args.render_validation:
            flow_folder = "{}/inference/{}.epoch-{}-flow-field".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)

        # visualization folder
        if args.inference_visualize:
            flow_vis_folder = "{}/inference/{}.epoch-{}-flow-vis".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_vis_folder):
                os.makedirs(flow_vis_folder)

        if args.save_frames or args.save_inferenceLog:
            inference_folder = "{}/{}.epoch-{}".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(inference_folder):
                os.makedirs(inference_folder)

        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader,
                        ncols=100,
                        total=np.minimum(len(data_loader),
                                         args.inference_n_batches),
                        desc='Inferencing ',
                        leave=True,
                        position=offset)

        print('[LOG] We assume that "inference_batch_size" arg is always 1')
        if data_loader.dataset.ref_names == None:
            f_names = [f'{f_idx:06d}.png' for f_idx in range(len(data_loader))]
        else:
            f_names = data_loader.dataset.ref_names

        if args.save_inferenceLog:
            log_labels = ['filename'] + list(model.module.loss.loss_labels)
            log_dict = {l: {} for l in log_labels}
            for i in range(len(data_loader)):
                log_dict['filename'][i] = f_names[i]

        statistics = []
        total_loss = 0
        for batch_idx, (data, target) in enumerate(progress):
            if args.cuda:
                data, target = [d.cuda(non_blocking=True) for d in data
                                ], [t.cuda(non_blocking=True) for t in target]
            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]

            # when ground-truth flows are not available for inference_dataset,
            # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows,
            # depending on the type of loss norm passed in
            with torch.no_grad():
                pred_losses, output = model(data[0], target[0], inference=True)

            losses = [torch.mean(loss_value) for loss_value in pred_losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            statistics.append(loss_values)
            # import IPython; IPython.embed()
            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    _pflow = output[i].data.cpu().numpy().transpose(1, 2, 0)
                    flow_utils.writeFlow(
                        join(
                            flow_folder, '%06d.flo' %
                            (batch_idx * args.inference_batch_size + i)),
                        _pflow)

                    # You can comment out the plt block in visulize_flow_file() for real-time visualization
                    if args.inference_visualize:
                        flow_utils.visulize_flow_file(
                            join(
                                flow_folder, '%06d.flo' %
                                (batch_idx * args.inference_batch_size + i)),
                            flow_vis_folder)

            if args.save_frames:
                from PIL import Image
                _pframe = output[0].data.cpu().numpy().transpose(1, 2, 0)
                _pframe = (_pframe).clip(min=0, max=255).astype(np.uint8)
                f_name = f_names[batch_idx]
                png_data = Image.fromarray(_pframe)
                png_data.save(f'{inference_folder}/{f_name}')

            if args.save_inferenceLog:
                for label, loss in zip(loss_labels, pred_losses):
                    log_dict[label][batch_idx] = str(loss.cpu().numpy())

            progress.set_description(
                'Inference Averages for Epoch {}: '.format(epoch) +
                tools.format_dictionary_of_losses(
                    loss_labels,
                    np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()
        if args.save_inferenceLog:
            import json
            with open(f'{inference_folder}/log.json', 'w') as fp:
                json.dump(log_dict, fp, sort_keys=True, indent=4)
        return
Ejemplo n.º 11
0
    def inference(args, epoch, data_loader, model, offset=0):

        model.eval()

        if args.save_flow or args.render_validation:
            flow_folder = "{}/inference/{}.epoch-{}-flow-field".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_folder):
                os.makedirs(flow_folder)

        # visualization folder
        if args.inference_visualize:
            flow_vis_folder = "{}/inference/{}.epoch-{}-flow-vis".format(
                args.save, args.name.replace('/', '.'), epoch)
            if not os.path.exists(flow_vis_folder):
                os.makedirs(flow_vis_folder)

        args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches

        progress = tqdm(data_loader,
                        ncols=200,
                        total=np.minimum(len(data_loader),
                                         args.inference_n_batches),
                        desc='Inferencing ',
                        leave=True,
                        position=offset)

        statistics = []
        total_loss = 0
        for batch_idx, (data, target) in enumerate(progress):
            if args.cuda:
                data, target = [d.cuda(non_blocking=True) for d in data
                                ], [t.cuda(non_blocking=True) for t in target]
            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]

            # when ground-truth flows are not available for inference_dataset,
            # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows,
            # depending on the type of loss norm passed in
            with torch.no_grad():
                losses, output = model(data[0], target[0], inference=True)

            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            statistics.append(loss_values)
            # import IPython; IPython.embed()
            if args.save_flow or args.render_validation:
                for i in range(args.inference_batch_size):
                    _pflow_all = output[i].data.cpu().numpy().transpose(
                        1, 2, 0)
                    _tflow_all = target[0][i].data.cpu().numpy()
                    if len(_tflow_all.shape) == 4:
                        _tflow_all = _tflow_all.transpose(1, 2, 3, 0)
                    elif len(_tflow_all.shape) == 3:
                        _tflow_all = _tflow_all.transpose(1, 2, 0)
                    else:
                        ValueError('Unsupported dimensions of _tflow_all')

                    for j in range(0, output.shape[1], 2):
                        _pflow = _pflow_all[:, :, j:j + 2]

                        if len(_tflow_all.shape) == 4:
                            _tflow = _tflow_all[int(j / 2), :, :, :]
                        elif len(_tflow_all.shape) == 3:
                            _tflow = _tflow_all
                        else:
                            ValueError('Unsupported dimensions of _tflow_all')

                        flow_filename_base = '%06d_%06d' % (
                            batch_idx * args.inference_batch_size + i,
                            int(j / 2))

                        flow_utils.writeFlow(
                            join(flow_folder, flow_filename_base) + '.flo',
                            _pflow)

                        # You can comment out the plt block in visulize_flow_file() for real-time visualization
                        # if args.inference_visualize:
                        #     flow_utils.visulize_flow_file(
                        #         join(flow_folder, '%06d.flo' % (batch_idx * args.inference_batch_size + i)),flow_vis_folder)

                        flow_utils.writeFlow(
                            join(flow_folder,
                                 flow_filename_base + '_target.flo'), _tflow)

                        # You can comment out the plt block in visulize_flow_file() for real-time visualization
                        if args.inference_visualize:
                            # flow_utils.visulize_flow_file_and_target(
                            #     join(flow_folder, '%06d.flo' % (batch_idx * args.inference_batch_size + i)),
                            #     join(flow_folder, '%06d_target.flo' % (batch_idx * args.inference_batch_size + i)),
                            #     flow_vis_folder)

                            results_image = visualize_results(
                                _pflow, _tflow, data[0][i])
                            cv2.imwrite(
                                join(flow_vis_folder,
                                     flow_filename_base + '_vis.png'),
                                cv2.cvtColor(results_image, cv2.COLOR_RGB2BGR))

            progress.set_description(
                'Inference Averages for Epoch {}: '.format(epoch) +
                tools.format_dictionary_of_losses(
                    loss_labels,
                    np.array(statistics).mean(axis=0)))
            progress.update(1)

            if batch_idx == (args.inference_n_batches - 1):
                break

        progress.close()

        return
Ejemplo n.º 12
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              logger,
              is_validate=False,
              offset=0):
        statistics = []
        all_gradient_norms = []
        total_loss = 0

        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(epoch)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=200,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=200,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):

            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]
            if args.cuda and args.number_gpus == 1:
                data, target = [d.cuda(non_blocking=True) for d in data
                                ], [t.cuda(non_blocking=True) for t in target]

            optimizer.zero_grad() if not is_validate else None

            losses, flow = model(data[0], target[0])
            #print('Losses shape {} {}'.format(losses[0].shape, losses[1].shape))

            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    gradient_norm = torch.nn.utils.clip_grad_norm(
                        model.parameters(), args.gradient_clip)
                    all_gradient_norms.append(gradient_norm)

                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch,
                                                     global_iteration,
                                                     optimizer)
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', epoch)

            progress.set_description(
                title + ' ' +
                tools.format_dictionary_of_losses(loss_labels, statistics[-1]))

            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or
                (is_validate and batch_idx == args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (progress._time() - last_log_time),
                    global_iteration)
                last_log_time = progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(loss_labels):
                    logger.add_scalar('average batch ' + str(key),
                                      all_losses[:,
                                                 i].mean(), global_iteration)
                    logger.add_histogram(str(key), all_losses[:, i],
                                         global_iteration)

                if args.gradient_clip:
                    logger.add_scalar('average batch gradient_norm',
                                      np.array(all_gradient_norms).mean(),
                                      global_iteration)
                    all_gradient_norms = []

                # Returns multiscale flow, get largest scale and first element in batch
                if args.multiframe or args.multiframe_two_output:
                    flow = flow_utils.flow_postprocess(flow)[0][0]

                    num_flows = len(args.frame_weights)
                    flows_scaled = [
                        cv2.resize(flow[:, :, i:i + 2], None, fx=4.0, fy=4.0)
                        for i in range(0, 2 * num_flows, 2)
                    ]

                    target = target[0].detach().cpu().numpy()
                    target_flow = np.transpose(target[0], (1, 2, 3, 0))

                    results_images = [
                        visualize_results(flows_scaled[i], target_flow[i],
                                          data[0][0] if i == 0 else None)
                        for i in range(0, num_flows)
                    ]

                    for i in range(0, num_flows):
                        logger.add_image('flow{} and target'.format(i),
                                         ToTensor()(results_images[i]),
                                         global_iteration)

                else:
                    flow = flow_utils.flow_postprocess(flow)[0][0]
                    flow_scaled = cv2.resize(flow, None, fx=4.0, fy=4.0)
                    target_flow = flow_utils.flow_postprocess(target)[0][0]
                    results_image = visualize_results(flow_scaled, target_flow,
                                                      data[0][0])
                    logger.add_image('flow and target',
                                     ToTensor()(results_image),
                                     global_iteration)

                # logger.add_histogram('flow_values', flow[0], global_iteration)

            # Reset Summary
            statistics = []

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 13
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              logger,
              is_validate=False,
              offset=0):
        statistics = []
        total_loss = 0

        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(epoch)
            #print("validation_n_batches", args.validation_n_batches)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            #print("validation_n_batches", args.validation_n_batches)
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=100,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=120,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):

            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]
            if args.cuda and args.number_gpus == 1:
                data, target = [d.cuda(async=True) for d in data
                                ], [t.cuda(async=True) for t in target]

            optimizer.zero_grad() if not is_validate else None
            #print("this is data type",data[0].type())
            #print("\n")
            #print("this is target type",target[0].type())
            #print("\n")
            losses = model(data[0], target[0])
            losses = [torch.mean(loss_value)
                      for loss_value in losses]  # taking mean of batches
            loss_val = losses[
                0]  # Collect first loss for weight update #take first loss, second is EPE
            total_loss += loss_val.data.cpu()
            loss_values = [v.data.cpu() for v in losses]  #collect loss values

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            #loss_labels = [y for x in model.module.loss.loss_labels for y in x] #list(model.module.loss.loss_labels)
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss.cpu())

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch,
                                                     global_iteration,
                                                     optimizer)
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)  #add load

            #if is_validate:
            #    print("this is EPE length", len(loss_values[:,1]))
            # Print out statistics
            #if is_validate:
            #    print(statistics)
            statistics.append(loss_values)
            #if is_validate:
            #    print(statistics)
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', epoch)

            progress.set_description(
                title + ' ' +
                tools.format_dictionary_of_losses(loss_labels, statistics[-1]))

            #if is_validate:
            #    print(batch_idx)
            # args.log_frequency == 1 by default
            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or is_validate
                    and batch_idx == min(args.validation_n_batches,
                                         len(data_loader) - 1)):
                #if ((((global_iteration + 1) % args.log_frequency) == 0 and not is_validate) or (is_validate and batch_idx == args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (progress._time() - last_log_time),
                    global_iteration)
                last_log_time = progress._time()

                all_losses = np.array(statistics)
                #if is_validate:
                #    print(all_losses)

                for i, key in enumerate(loss_labels):
                    logger.add_scalar('average batch ' + str(key),
                                      all_losses[:,
                                                 i].mean(), global_iteration)
                    logger.add_histogram(str(key), all_losses[:, i],
                                         global_iteration)

            # Reset Summary
                statistics = []

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 14
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              logger,
              is_validate=False,
              offset=0):
        #print(str(model))
        statistics = []
        total_loss = 0
        debug = False
        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(epoch)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=100,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=120,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        last_log_time = progress._time()

        for batch_idx, (data, target, cdm) in enumerate(progress):
            data, target, cdm = [
                Variable(d, volatile=is_validate) for d in data
            ], [Variable(t, volatile=is_validate) for t in target
                ], [Variable(q, volatile=is_validate) for q in cdm]

            if args.cuda and args.number_gpus == 1:
                data, target, cdm = [d.cuda(async=True) for d in data
                                     ], [t.cuda(async=True) for t in target
                                         ], [q.cuda(async=True) for q in cdm]

            if debug:
                print(
                    '****************************************************************'
                )
                print('data_0')
                print(data[0])
                print('target_0')
                print(target[0])
                print('cdm')
                print(type(cdm))
                temp1 = cdm[0].data.cpu().numpy()
                print(np.max(temp1))
                print(temp1.shape)
                print(
                    '****************************************************************'
                )

            optimizer.zero_grad() if not is_validate else None
            losses = model(data[0], target[0])
            losses = [torch.mean(loss_value) for loss_value in losses]

            loss_val = losses[0]  # Collect first loss for weight update
            #A[batch_idx] =  loss_val.data[0]
            #np.savetxt('test_loss.out', np.array(A) , delimiter=',' , newline='\r\n'  )

            total_loss += loss_val.data[0]
            loss_values = [v.data[0] for v in losses]

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            global_iteration = start_iteration + batch_idx
            if not is_validate:
                tools.update_hyperparameter_schedule(args, epoch,
                                                     global_iteration,
                                                     optimizer)
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)

            # Print out statistics
            statistics.append(loss_values)
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', epoch)

            progress.set_description(
                title + ' ' +
                tools.format_dictionary_of_losses(loss_labels, statistics[-1]))

            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or
                (is_validate and batch_idx == args.validation_n_batches - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar(
                    'batch logs per second',
                    len(statistics) / (progress._time() - last_log_time),
                    global_iteration)
                last_log_time = progress._time()

                all_losses = np.array(statistics)

                for i, key in enumerate(loss_labels):
                    logger.add_scalar('average batch ' + str(key),
                                      all_losses[:,
                                                 i].mean(), global_iteration)
                    logger.add_histogram(str(key), all_losses[:, i],
                                         global_iteration)

            # Reset Summary
            statistics = []

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)
Ejemplo n.º 15
0
    def train(args,
              epoch,
              start_iteration,
              data_loader,
              model,
              optimizer,
              scheduler,
              logger,
              is_validate=False,
              offset=0,
              max_flows_to_show=8):
        running_statistics = None  # Initialize below when the first losses are collected
        all_losses = None  # Initialize below when the first losses are collected
        total_loss = 0

        if is_validate:
            model.eval()
            title = 'Validating Epoch {}'.format(epoch)
            args.validation_n_batches = np.inf if args.validation_n_batches < 0 else args.validation_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=100,
                            total=np.minimum(len(data_loader),
                                             args.validation_n_batches),
                            leave=True,
                            position=offset,
                            desc=title)
        else:
            model.train()
            title = 'Training Epoch {}'.format(epoch)
            args.train_n_batches = np.inf if args.train_n_batches < 0 else args.train_n_batches
            progress = tqdm(tools.IteratorTimer(data_loader),
                            ncols=120,
                            total=np.minimum(len(data_loader),
                                             args.train_n_batches),
                            smoothing=.9,
                            miniters=1,
                            leave=True,
                            position=offset,
                            desc=title)

        def convert_flow_to_image(flow_converter, flows_viz):
            imgs = []
            for flow_pair in flows_viz:
                for flow in flow_pair:
                    flow = flow.numpy().transpose((1, 2, 0))
                    img = flow_converter._flowToColor(flow)
                    imgs.append(torch.from_numpy(img.transpose((2, 0, 1))))
                epe_img = torch.sqrt(
                    torch.sum(torch.pow(flow_pair[0] - flow_pair[1], 2),
                              dim=0))
                max_epe = torch.max(epe_img)
                if max_epe == 0:
                    max_epe = torch.ones(1)
                normalized_epe_img = epe_img / max_epe
                normalized_epe_img = (255 * normalized_epe_img).type(
                    torch.uint8)
                normalized_epe_img = torch.stack(
                    (normalized_epe_img, normalized_epe_img,
                     normalized_epe_img),
                    dim=0)
                imgs.append(normalized_epe_img)

                saturated_epe_img = torch.min(epe_img,
                                              5.0 * torch.ones_like(epe_img))
                saturated_epe_img = (51 * saturated_epe_img).type(torch.uint8)
                saturated_epe_img = torch.stack(
                    (saturated_epe_img, saturated_epe_img, saturated_epe_img),
                    dim=0)
                imgs.append(saturated_epe_img)
            return imgs

        max_iters = min(len(data_loader),
                        (args.validation_n_batches if
                         (is_validate and args.validation_n_batches > 0) else
                         len(data_loader)),
                        (args.train_n_batches if
                         (not is_validate
                          and args.train_n_batches > 0) else len(data_loader)))

        if is_validate:
            flow_converter = f2i.Flow()
            collect_flow_interval = int(
                np.ceil(float(max_iters) / max_flows_to_show))
            flows_viz = []

        last_log_batch_idx = 0
        last_log_time = progress._time()
        for batch_idx, (data, target) in enumerate(progress):
            global_iteration = start_iteration + batch_idx

            data, target = [Variable(d)
                            for d in data], [Variable(t) for t in target]
            if args.cuda and args.number_gpus == 1:
                data, target = [d.cuda()
                                for d in data], [t.cuda() for t in target]

            optimizer.zero_grad() if not is_validate else None
            losses, output = model(data[0], target[0], inference=True)
            losses = [torch.mean(loss_value) for loss_value in losses]
            loss_val = losses[0]  # Collect first loss for weight update
            total_loss += loss_val.item()
            loss_values = [v.item() for v in losses]

            if is_validate and batch_idx % collect_flow_interval == 0:
                flows_viz.append(
                    (target[0][0].detach().cpu(), output[0].detach().cpu()))

            if is_validate and args.validation_log_images and batch_idx == (
                    max_iters - 1):
                imgs = convert_flow_to_image(flow_converter, flows_viz)
                imgs = torchvision_utils.make_grid(imgs,
                                                   nrow=4,
                                                   normalize=False,
                                                   scale_each=False)
                logger.add_image('target/predicted flows', imgs,
                                 global_iteration)

            # gather loss_labels, direct return leads to recursion limit error as it looks for variables to gather'
            loss_labels = list(model.module.loss.loss_labels)

            assert not np.isnan(total_loss)

            if not is_validate and args.fp16:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)

                params = list(model.parameters())
                for i in range(len(params)):
                    param_copy[i].grad = params[i].grad.clone().type_as(
                        params[i]).detach()
                    param_copy[i].grad.mul_(1. / args.loss_scale)
                optimizer.step()
                for i in range(len(params)):
                    params[i].data.copy_(param_copy[i].data)

            elif not is_validate:
                loss_val.backward()
                if args.gradient_clip:
                    torch.nn.utils.clip_grad_norm(model.parameters(),
                                                  args.gradient_clip)
                optimizer.step()

            # Update hyperparameters if needed
            if not is_validate:
                scheduler.step()
                loss_labels.append('lr')
                loss_values.append(optimizer.param_groups[0]['lr'])

            loss_labels.append('load')
            loss_values.append(progress.iterable.last_duration)

            if running_statistics is None:
                running_statistics = np.array(loss_values)
                all_losses = np.zeros((len(data_loader), len(loss_values)),
                                      np.float32)
            else:
                running_statistics += np.array(loss_values)
            all_losses[batch_idx] = loss_values.copy()
            title = '{} Epoch {}'.format(
                'Validating' if is_validate else 'Training', epoch)

            progress.set_description(title + ' ' +
                                     tools.format_dictionary_of_losses(
                                         loss_labels, running_statistics /
                                         (batch_idx + 1)))

            if ((((global_iteration + 1) % args.log_frequency) == 0
                 and not is_validate) or (batch_idx == max_iters - 1)):

                global_iteration = global_iteration if not is_validate else start_iteration

                logger.add_scalar('batch logs per second',
                                  (batch_idx - last_log_batch_idx) /
                                  (progress._time() - last_log_time),
                                  global_iteration)
                last_log_time = progress._time()
                last_log_batch_idx = batch_idx

                for i, key in enumerate(loss_labels):
                    logger.add_scalar('average batch ' + str(key),
                                      all_losses[:batch_idx + 1,
                                                 i].mean(), global_iteration)
                    logger.add_histogram(str(key), all_losses[:batch_idx + 1,
                                                              i],
                                         global_iteration)

            if (is_validate and (batch_idx == args.validation_n_batches)):
                break

            if ((not is_validate) and (batch_idx == (args.train_n_batches))):
                break

        progress.close()

        return total_loss / float(batch_idx + 1), (batch_idx + 1)