Beispiel #1
0
    def __init__(self,
                 train=True,
                 path='./TRANCOS_v3',
                 out_shape=8,
                 transform=NP_T.ToTensor(),
                 gamma=30,
                 max_len=None,
                 cameras=None):
        r"""
        Args:
            train: train (`True`) or test (`False`) images (default: `True`).
            path: path for the dataset (default: "./TRANCOS_v3").
            out_shape: shape of the output images (default: (120, 176)).
            transform: transformations to apply to the images as np.arrays (default: `NP_T.ToTensor()`).
            gamma: precision parameter of the Gaussian kernel (default: 30).
            max_len: maximum sequence length (default: `None`).
            cameras: list with the camera IDs to be used, so that images from other cameras are discarded;
                if `None`, all cameras are used; it has no effect if `get_cameras` is `False` (default: `None`).
        """
        super(TrancosSeq, self).__init__(train=train,
                                         path=path,
                                         size_red=size_red,
                                         transform=transform,
                                         gamma=gamma,
                                         get_cameras=True,
                                         cameras=cameras)

        self.img2idx = {img: idx
                        for idx, img in enumerate(self.image_files)
                        }  # hash table from file names to indices
        self.seqs = [
        ]  # list of lists containing the names of the images in each sequence
        prev_cid = -1
        cur_len = 0
        with open(os.path.join(self.path, 'images',
                               'cam_annotations.txt')) as f:
            for line in f:
                img_f, cid = line.split()
                if img_f in self.image_files:
                    # all images in the sequence must be from the same camera
                    # and all sequences must have length not greater than max_len
                    if (int(cid) == prev_cid) and ((max_len is None) or
                                                   (cur_len < max_len)):
                        self.seqs[-1].append(img_f)
                        cur_len += 1
                    else:
                        self.seqs.append([img_f])
                        cur_len = 1
                        prev_cid = int(cid)

        if max_len is None:
            # maximum sequence length in the dataset
            self.max_len = max([len(seq) for seq in self.seqs])
        else:
            self.max_len = max_len
Beispiel #2
0
    def __init__(self,
                 path='./citycam/preprocessed',
                 out_shape=(120, 176),
                 transform=NP_T.ToTensor(),
                 gamma=30,
                 max_len=None,
                 cameras=None,
                 load_all=True):
        r"""
        Args:
            train: train (`True`) or test (`False`) images (default: `True`).
            path: path for the dataset (default: "./citycam/preprocessed").
            out_shape: shape of the output images (default: (120, 176)).
            transform: transformations to apply to the images as np.arrays (default: `NP_T.ToTensor()`).
            gamma: precision parameter of the Gaussian kernel (default: 30).
            max_len: maximum sequence length (default: `None`).
            cameras: list with the camera IDs to be used, so that images from other cameras are discarded;
                if `None`, all cameras are used; it has no effect if `get_cameras` is `False` (default: `None`).
        """
        super(WebcamTSeq, self).__init__(path=path,
                                         out_shape=out_shape,
                                         transform=transform,
                                         gamma=gamma,
                                         get_cameras=True,
                                         cameras=cameras,
                                         load_all=load_all)

        self.img2idx = {img: idx
                        for idx, img in enumerate(self.image_files)
                        }  # hash table from file names to indices
        self.seqs = []
        for i, img_f in enumerate(self.image_files):
            seq_id = img_f.split(os.sep)
            if i == 0:
                self.seqs.append([img_f])
                prev_seq_id = seq_id
                continue

            if (seq_id == prev_seq_id) and ((max_len is None) or
                                            (i % max_len > 0)):
                self.seqs[-1].append(img_f)
            else:
                self.seqs.append([img_f])
            prev_seq_id = seq_id

        self.max_len = max_len if (max_len is not None) else max(
            [len(seq) for seq in self.seqs])
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser(
        description='Train FCN in Trancos or WebcamT datasets.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-m',
                        '--model_path',
                        default='./fcn.pth',
                        type=str,
                        metavar='',
                        help='model file (output of train)')
    parser.add_argument('-d',
                        '--dataset',
                        default='TRANCOS',
                        type=str,
                        metavar='',
                        help='dataset')
    parser.add_argument('-p',
                        '--data_path',
                        default='/ctm-hdd-pool01/DB/TRANCOS_v3',
                        type=str,
                        metavar='',
                        help='data directory path')
    parser.add_argument('--valid',
                        default=0.2,
                        type=float,
                        metavar='',
                        help='fraction of the training data for validation')
    parser.add_argument('--lr',
                        default=1e-3,
                        type=float,
                        metavar='',
                        help='learning rate')
    parser.add_argument('--epochs',
                        default=500,
                        type=int,
                        metavar='',
                        help='number of training epochs')
    parser.add_argument('--batch_size',
                        default=32,
                        type=int,
                        metavar='',
                        help='batch size')
    parser.add_argument('--img_shape',
                        default=[120, 160],
                        type=int,
                        metavar='',
                        help='shape of the input images')
    parser.add_argument(
        '--lambda',
        default=1e-3,
        type=float,
        metavar='',
        help=
        'trade-off between density estimation and vehicle count losses (see eq. 7 in the paper)'
    )
    parser.add_argument(
        '--gamma',
        default=1e3,
        type=float,
        metavar='',
        help='precision parameter of the Gaussian kernel (inverse of variance)'
    )
    parser.add_argument('--weight_decay',
                        default=0.,
                        type=float,
                        metavar='',
                        help='weight decay regularization')
    parser.add_argument('--use_cuda',
                        default=True,
                        type=int,
                        metavar='',
                        help='use CUDA capable GPU')
    parser.add_argument('--use_visdom',
                        default=False,
                        type=int,
                        metavar='',
                        help='use Visdom to visualize plots')
    parser.add_argument('--visdom_env',
                        default='FCN_train',
                        type=str,
                        metavar='',
                        help='Visdom environment name')
    parser.add_argument('--visdom_port',
                        default=8888,
                        type=int,
                        metavar='',
                        help='Visdom port')
    parser.add_argument(
        '--n2show',
        default=8,
        type=int,
        metavar='',
        help='number of examples to show in Visdom in each epoch')
    parser.add_argument('--vis_shape',
                        nargs=2,
                        default=[120, 160],
                        type=int,
                        metavar='',
                        help='shape of the images shown in Visdom')
    parser.add_argument('--seed',
                        default=42,
                        type=int,
                        metavar='',
                        help='random seed')
    args = vars(parser.parse_args())

    # dump args to a txt file for your records
    with open(args['model_path'] + '.txt', 'w') as f:
        f.write(str(args) + '\n')

    # use a fixed random seed for reproducibility purposes
    if args['seed'] > 0:
        random.seed(args['seed'])
        np.random.seed(seed=args['seed'])
        torch.manual_seed(args['seed'])

    # if args['use_cuda'] == True and we have a GPU, use the GPU; otherwise, use the CPU
    device = 'cuda:0' if (args['use_cuda']
                          and torch.cuda.is_available()) else 'cpu:0'
    print('device:', device)

    # define image transformations to be applied to each image in the dataset
    train_transf = T.Compose([
        NP_T.RandomHorizontalFlip(
            0.5
        ),  # data augmentation: horizontal flipping (we could add more transformations)
        NP_T.ToTensor()  # convert np.array to tensor
    ])
    valid_transf = NP_T.ToTensor()  # no data augmentation in validation

    # instantiate the dataset
    if args['dataset'].upper() == 'TRANCOS':
        train_data = Trancos(train=True,
                             path=args['data_path'],
                             out_shape=args['out_shape'],
                             transform=train_transf,
                             gamma=args['gamma'])
        valid_data = Trancos(train=True,
                             path=args['data_path'],
                             out_shape=args['out_shape'],
                             transform=valid_transf,
                             gamma=args['gamma'])
    else:
        train_data = WebcamT(path=args['data_path'],
                             out_shape=args['out_shape'],
                             transform=train_transf,
                             gamma=args['gamma'])
        valid_data = WebcamT(path=args['data_path'],
                             out_shape=args['out_shape'],
                             transform=valid_transf,
                             gamma=args['gamma'])

    # split the data into training and validation sets
    if args['valid'] > 0:
        valid_indices = set(
            random.sample(range(len(train_data)),
                          int(len(train_data) * args['valid']))
        )  # randomly choose some images for validation
        valid_data = Subset(valid_data, list(valid_indices))
        train_indices = set(range(len(
            train_data))) - valid_indices  # remaining images are for training
        train_data = Subset(train_data, list(train_indices))
    else:
        valid_data = None

    # create data loaders for training and validation
    train_loader = DataLoader(
        train_data, batch_size=args['batch_size'],
        shuffle=True)  # shuffle the data at the beginning of each epoch
    if valid_data:
        valid_loader = DataLoader(
            valid_data, batch_size=args['batch_size'],
            shuffle=False)  # no need to shuffle in validation
    else:
        valid_loader = None

    # instantiate the model and define an optimizer
    model = FCN_rLSTM(temporal=False).to(device)
    print(model)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    # Visdom is a tool to visualize plots during training
    if args['use_visdom']:
        loss_plt = plotter.VisdomLossPlotter(env_name=args['visdom_env'],
                                             port=args['visdom_port'])
        img_plt = plotter.VisdomImgsPlotter(env_name=args['visdom_env'],
                                            port=args['visdom_port'])

    # training routine
    for epoch in range(args['epochs']):
        print('Epoch {}/{}'.format(epoch, args['epochs'] - 1))

        # training phase
        model.train(
        )  # set model to training mode (affects batchnorm and dropout, if present)
        loss_hist = []
        density_loss_hist = []
        count_loss_hist = []
        count_err_hist = []
        X, mask, density, count = None, None, None, None
        t0 = time.time()
        for i, (X, mask, density, count) in enumerate(train_loader):
            # copy the tensors to GPU (if applicable)
            X, mask, density, count = X.to(device), mask.to(
                device), density.to(device), count.to(device)

            # forward pass through the model
            density_pred, count_pred = model(X, mask=mask)

            # compute the loss
            N = X.shape[0]
            density_loss = torch.sum((density_pred - density)**2) / (2 * N)
            count_loss = torch.sum((count_pred - count)**2) / (2 * N)
            loss = density_loss + args['lambda'] * count_loss

            # backward pass and optimization step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print(
                '{}/{} mini-batch loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f}'
                .format(i,
                        len(train_loader) - 1, loss.item(),
                        density_loss.item(), count_loss.item()),
                flush=True,
                end='\r')

            # save the loss values
            loss_hist.append(loss.item())
            density_loss_hist.append(density_loss.item())
            count_loss_hist.append(count_loss.item())
            with torch.no_grad(
            ):  # evaluation metric, so no need to compute gradients
                count_err = torch.sum(torch.abs(count_pred - count)) / N
            count_err_hist.append(count_err.item())
        t1 = time.time()
        print()

        # print the average training losses
        train_loss = sum(loss_hist) / len(loss_hist)
        train_density_loss = sum(density_loss_hist) / len(density_loss_hist)
        train_count_loss = sum(count_loss_hist) / len(count_loss_hist)
        train_count_err = sum(count_err_hist) / len(count_err_hist)
        print('Training statistics:')
        print(
            'global loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}'
            .format(train_loss, train_density_loss, train_count_loss,
                    train_count_err))
        print('time: {:.0f} seconds'.format(t1 - t0))

        if args['use_visdom']:
            # plot the losses
            loss_plt.plot('global loss', 'train', 'MSE', epoch, train_loss)
            loss_plt.plot('density loss', 'train', 'MSE', epoch,
                          train_density_loss)
            loss_plt.plot('count loss', 'train', 'MSE', epoch,
                          train_count_loss)
            loss_plt.plot('count error', 'train', 'MAE', epoch,
                          train_count_err)

            # show a few training examples (images + density maps)
            X *= mask  # show the active region only
            X, density, count = X.cpu().numpy(), density.cpu().numpy(
            ), count.cpu().numpy()
            density_pred, count_pred = density_pred.detach().cpu().numpy(
            ), count_pred.detach().cpu().numpy()
            n2show = min(args['n2show'],
                         X.shape[0])  # show args['n2show'] images at most
            show_images(img_plt,
                        'train gt',
                        X[0:n2show],
                        density[0:n2show],
                        count[0:n2show],
                        shape=args['vis_shape'])
            show_images(img_plt,
                        'train pred',
                        X[0:n2show],
                        density_pred[0:n2show],
                        count_pred[0:n2show],
                        shape=args['vis_shape'])

        if valid_loader is None:
            print()
            continue

        # validation phase
        model.eval(
        )  # set model to evaluation mode (affects batchnorm and dropout, if present)
        loss_hist = []
        density_loss_hist = []
        count_loss_hist = []
        count_err_hist = []
        X, mask, density, count = None, None, None, None
        t0 = time.time()
        for i, (X, mask, density, count) in enumerate(valid_loader):
            # copy the tensors to GPU (if available)
            X, mask, density, count = X.to(device), mask.to(
                device), density.to(device), count.to(device)

            # forward pass through the model
            with torch.no_grad(
            ):  # no need to compute gradients in validation (faster and uses less memory)
                density_pred, count_pred = model(X, mask=mask)

            # compute the loss
            N = X.shape[0]
            density_loss = torch.sum((density_pred - density)**2) / (2 * N)
            count_loss = torch.sum((count_pred - count)**2) / (2 * N)
            loss = density_loss + args['lambda'] * count_loss

            # save the loss values
            loss_hist.append(loss.item())
            density_loss_hist.append(density_loss.item())
            count_loss_hist.append(count_loss.item())
            count_err = torch.sum(torch.abs(count_pred - count)) / N
            count_err_hist.append(count_err.item())
        t1 = time.time()

        # print the average validation losses
        valid_loss = sum(loss_hist) / len(loss_hist)
        valid_density_loss = sum(density_loss_hist) / len(density_loss_hist)
        valid_count_loss = sum(count_loss_hist) / len(count_loss_hist)
        valid_count_err = sum(count_err_hist) / len(count_err_hist)
        print('Validation statistics:')
        print(
            'global loss: {:.3f} | density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}'
            .format(valid_loss, valid_density_loss, valid_count_loss,
                    valid_count_err))
        print('time: {:.0f} seconds'.format(t1 - t0))
        print()

        if args['use_visdom']:
            # plot the losses
            loss_plt.plot('global loss', 'valid', 'MSE', epoch, valid_loss)
            loss_plt.plot('density loss', 'valid', 'MSE', epoch,
                          valid_density_loss)
            loss_plt.plot('count loss', 'valid', 'MSE', epoch,
                          valid_count_loss)
            loss_plt.plot('count error', 'valid', 'MAE', epoch,
                          valid_count_err)

            # show a few training examples (images + density maps)
            X *= mask  # show the active region only
            X, density, count = X.cpu().numpy(), density.cpu().numpy(
            ), count.cpu().numpy()
            density_pred, count_pred = density_pred.cpu().numpy(
            ), count_pred.cpu().numpy()
            n2show = min(args['n2show'],
                         X.shape[0])  # show args['n2show'] images at most
            show_images(img_plt,
                        'valid gt',
                        X[0:n2show],
                        density[0:n2show],
                        count[0:n2show],
                        shape=args['vis_shape'])
            show_images(img_plt,
                        'valid pred',
                        X[0:n2show],
                        density_pred[0:n2show],
                        count_pred[0:n2show],
                        shape=args['vis_shape'])

    torch.save(model.state_dict(), args['model_path'])
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description='Test FCN in Trancos dataset.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-m',
                        '--model_path',
                        default='./fcn.pth',
                        type=str,
                        metavar='',
                        help='model file (output of train)')
    parser.add_argument('-d',
                        '--data_path',
                        default='/ctm-hdd-pool01/DB/TRANCOS_v3',
                        type=str,
                        metavar='',
                        help='data directory path')
    parser.add_argument('--batch_size',
                        default=32,
                        type=int,
                        metavar='',
                        help='batch size')
    parser.add_argument(
        '--size_red',
        default=4,
        type=int,
        metavar='',
        help='size reduction factor to be applied to the images')
    parser.add_argument(
        '--gamma',
        default=1e3,
        type=float,
        metavar='',
        help='parameter of the Gaussian kernel (inverse of variance)')
    parser.add_argument('--use_cuda',
                        default=True,
                        type=int,
                        metavar='',
                        help='use CUDA capable GPU')
    parser.add_argument('--use_visdom',
                        default=False,
                        type=int,
                        metavar='',
                        help='use Visdom to visualize plots')
    parser.add_argument('--visdom_env',
                        default='FCN_test',
                        type=str,
                        metavar='',
                        help='Visdom environment name')
    parser.add_argument('--visdom_port',
                        default=8888,
                        type=int,
                        metavar='',
                        help='Visdom port')
    parser.add_argument('--n2show',
                        default=16,
                        type=int,
                        metavar='',
                        help='number of examples to show in Visdom')
    parser.add_argument('--vis_shape',
                        nargs=2,
                        default=[120, 160],
                        type=int,
                        metavar='',
                        help='shape of the images shown in Visdom')
    parser.add_argument('--seed',
                        default=-1,
                        type=int,
                        metavar='',
                        help='random seed')
    args = vars(parser.parse_args())

    # use a fixed random seed for reproducibility purposes
    if args['seed'] > 0:
        random.seed(args['seed'])
        np.random.seed(seed=args['seed'])
        torch.manual_seed(args['seed'])

    # if args['use_cuda'] == True and we have a GPU, use the GPU; otherwise, use the CPU
    device = 'cuda:0' if (args['use_cuda']
                          and torch.cuda.is_available()) else 'cpu'
    print('device:', device)

    # instantiate the dataset
    test_data = Trancos(train=False,
                        path=args['data_path'],
                        size_red=args['size_red'],
                        transform=NP_T.ToTensor(),
                        gamma=args['gamma'])

    # create a data loader
    test_loader = DataLoader(test_data,
                             batch_size=args['batch_size'],
                             shuffle=True)

    # instantiate the model
    model = FCN_rLSTM(temporal=False).to(device)
    model.load_state_dict(torch.load(args['model_path'], map_location=device))
    print(model)

    # Visdom is a tool to visualize plots during training
    if args['use_visdom']:
        img_plt = plotter.VisdomImgsPlotter(env_name=args['visdom_env'],
                                            port=args['visdom_port'])
        samples = {
            'X': [],
            'density': [],
            'count': [],
            'density_pred': [],
            'count_pred': []
        }
        nsaved = 0

    # do inference and print statistics
    model.eval()  # set model to evaluation mode
    density_loss = 0.
    count_loss = 0.
    count_err = 0.
    t0 = time.time()
    for i, (X, mask, density, count) in enumerate(test_loader):
        # copy the tensors to GPU (if available)
        X, mask, density, count = X.to(device), mask.to(device), density.to(
            device), count.to(device)

        # forward pass through the model
        with torch.no_grad(
        ):  # no need to compute gradients in test (faster and uses less memory)
            density_pred, count_pred = model(X, mask=mask)

        # compute the performance metrics
        density_loss += torch.sum((density_pred - density)**2) / 2
        count_loss += torch.sum((count_pred - count)**2) / 2
        count_err += torch.sum(torch.abs(count_pred - count))

        # save a few examples to show in Visdom
        if args['use_visdom'] and (nsaved < args['n2show']):
            n2save = min(X.shape[0], args['n2show'] - nsaved)
            samples['X'].append((X[0:n2save] * mask[0:n2save]).cpu().numpy())
            samples['density'].append(density[0:n2save].cpu().numpy())
            samples['count'].append(count[0:n2save].cpu().numpy())
            samples['density_pred'].append(
                density_pred[0:n2save].cpu().numpy())
            samples['count_pred'].append(count_pred[0:n2save].cpu().numpy())
            nsaved += n2save

        print('Testing... ({:.0f}% done)'.format(100. * (i + 1) /
                                                 len(test_loader)),
              flush=True,
              end='\r')
    print()
    density_loss /= len(test_data)
    count_loss /= len(test_data)
    count_err /= len(test_data)
    t1 = time.time()

    print('Test statistics:')
    print('density loss: {:.3f} | count loss: {:.3f} | count error: {:.3f}'.
          format(density_loss, count_loss, count_err))
    print('time: {:.0f} seconds'.format(t1 - t0))

    # show a few examples
    if args['use_visdom']:
        for key in samples:
            samples[key] = np.concatenate(samples[key], axis=0)

        show_images(img_plt,
                    'test gt',
                    samples['X'],
                    samples['density'],
                    samples['count'],
                    shape=args['vis_shape'])
        show_images(img_plt,
                    'test pred',
                    samples['X'],
                    samples['density_pred'],
                    samples['count_pred'],
                    shape=args['vis_shape'])