Example #1
0
    def __init__(self,
                 num_class,
                 num_segments,
                 modality,
                 base_model='resnet101',
                 new_length=None,
                 consensus_type='avg',
                 before_softmax=True,
                 dropout=0.8,
                 crop_num=1,
                 partial_bn=True):
        super(TSN, self).__init__()
        self.modality = modality
        self.num_segments = num_segments
        self.reshape = True
        self.before_softmax = before_softmax
        self.dropout = dropout
        self.crop_num = crop_num
        self.consensus_type = consensus_type
        if not before_softmax and consensus_type != 'avg':
            raise ValueError("Only avg consensus can be used after Softmax")

        if new_length is None:
            self.new_length = 1 if modality == "RGB" else 5
        else:
            self.new_length = new_length

        log(("""
Initializing TSN with base model: {}.
TSN Configurations:
    input_modality:     {}
    num_segments:       {}
    new_length:         {}
    consensus_module:   {}
    dropout_ratio:      {}
        """.format(base_model, self.modality, self.num_segments,
                   self.new_length, consensus_type, self.dropout)))

        self._prepare_base_model(base_model)

        feature_dim = self._prepare_tsn(num_class)

        if self.modality == 'Flow':
            log("Converting the ImageNet model to a flow init model")
            self.base_model = self._construct_flow_model(self.base_model)
            log("Done. Flow model ready...")
        elif self.modality == 'RGBDiff':
            log("Converting the ImageNet model to RGB+Diff init model")
            self.base_model = self._construct_diff_model(self.base_model)
            log("Done. RGBDiff model ready.")

        self.consensus = ConsensusModule(consensus_type)

        if not self.before_softmax:
            self.softmax = nn.Softmax()

        self._enable_pbn = partial_bn
        if partial_bn:
            self.partialBN(True)
Example #2
0
def validate(val_loader, model, criterion, iter, logger=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.log_freq == 0:
            log(('Test: [{0}/{1}]\t'
                 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                     i,
                     len(val_loader),
                     batch_time=batch_time,
                     loss=losses,
                     top1=top1,
                     top5=top5)))

    log((
        'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}'
        .format(top1=top1, top5=top5, loss=losses)))

    return top1.avg
Example #3
0
    def train(self, mode=True):
        """
        Override the default train() to freeze the BN parameters 通过重写train方法实现PBN
        :return:
        """
        super(TSN, self).train(mode)
        count = 0
        if self._enable_pbn:
            log("Freezing BatchNorm2D except the first one.")
            for m in self.base_model.modules():
                if isinstance(m, nn.BatchNorm2d):
                    count += 1
                    if count >= (2 if self._enable_pbn else 1):
                        m.eval()

                        # shutdown update in frozen mode
                        m.weight.requires_grad = False
                        m.bias.requires_grad = False
Example #4
0
    def _load_image(self, directory, record, idx):
        if self.modality == 'RGB' or self.modality == 'RGBDiff':
            directory += record.path
            return [
                Image.open(os.path.join(
                    directory, self.image_tmpl.format(idx))).convert('RGB')
            ]
        elif self.modality == 'Flow':
            log('directory:', directory)
            log('idx:', idx)
            #每一次返回两张光流图分为x,y方向各一张。
            x_img = Image.open(
                os.path.join(directory + 'u/' + record.path,
                             self.image_tmpl.format(idx))).convert('L')
            y_img = Image.open(
                os.path.join(directory + 'v/' + record.path,
                             self.image_tmpl.format(idx))).convert('L')

            return [x_img, y_img]
Example #5
0
def blank_loglikes(n):
    a = ones((NUM_CHARS, n)) * 0.1
    a[0, :] = 0.9
    a /= sqrt(square(a).sum(axis=0))
    return log(a)
Example #6
0
def uniform_loglikes(n):
    return log(ones((NUM_CHARS, n)) / float(NUM_CHARS))
Example #7
0
File: rnn.py Project: comadan/nn
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t-1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k-1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape((-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k+1)].T, dhs[k+1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' % k] += mult(dus[k][t], hs[k-1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t-1]
                        dhs[k][t-1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape((-1, 1)) / bsize

        return cost, self.grads
Example #8
0
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t - 1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k - 1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape(
                (-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k + 1)].T,
                                      dhs[k + 1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' %
                               k] += mult(dus[k][t], hs[k - 1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(
                            Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t - 1]
                        dhs[k][t - 1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape(
                (-1, 1)) / bsize

        return cost, self.grads
Example #9
0
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
else:
    raise ValueError('Unknown dataset ' + args.dataset)

net = TSN(num_class,
          1,
          args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          dropout=args.dropout)

checkpoint = torch.load(args.weights)
log("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                            checkpoint['best_prec1']))

base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}
net.load_state_dict(base_dict)

if args.test_crops == 1:
    cropping = torchvision.transforms.Compose([
        GroupScale(net.scale_size),
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
    cropping = torchvision.transforms.Compose(
        [GroupOverSample(net.input_size, net.scale_size)])
Example #10
0
def blank_loglikes(n):
    a = ones((NUM_CHARS, n)) * 0.1
    a[0, :] = 0.9
    a /= sqrt(square(a).sum(axis=0))
    return log(a)
Example #11
0
def uniform_loglikes(n):
    return log(ones((NUM_CHARS, n)) / float(NUM_CHARS))
Example #12
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)  #只返回网络结构

    # state_dict = model_zoo.load_url(model_urls[args.arch],model_dir='./')
    # pretrained_dict = {}
    # for key,value in state_dict.items():
    #     pretrained_dict['base_model.'+key] = value
    #
    # model_dict = model.state_dict()
    #
    # #filter weight
    # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    # # log(pretrained_dict.keys())
    #
    # model_dict.update(pretrained_dict)
    #
    # model.load_state_dict(model_dict) #模型加载预训练权重

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model,
                                  device_ids=args.gpus).cuda()  #实现单机多卡的分布式训练。

    if args.resume:
        if os.path.isfile(args.resume):
            log(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            log(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            log(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1  #每一个segment采集的帧的数量。
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            root_path="./jpegs_256/"
            if args.modality == 'RGB' else '../tvl1_flow/',
            list_file=args.train_list,
            num_segments=args.num_segments,  #默认为3
            new_length=data_length,
            modality=args.modality,
            # image_tmpl="frame{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            image_tmpl="frame{:06}.jpg",
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            root_path="./jpegs_256/"
            if args.modality == 'RGB' else '../tvl1_flow/',
            list_file=args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            # image_tmpl="frame{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            image_tmpl="frame{:06}.jpg",
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        log(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        # break

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #13
0
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    if args.no_partialbn:
        model.module.partialBN(False)
    else:
        model.module.partialBN(True)

    # switch to train mode
    model.train()  #转换到train模式,此模式也可以自己重写自己定义哪些层需要训练。

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        #RGB 模态
        # input 每一个sample应该是三张图片。 Input.size() ==> [16,9,224,224]具体请看model.py文件
        # taeget.size() == > [16]
        #FLOW 模态
        # Input 每一个sample应该是三个堆叠的光流图堆,Input.size() ==> [16,30,224,224]
        # taeget.size() ==> 16 batch_size=16,一个batch0总共有16段视频。故总共16个target.
        # log('input:',input.size())
        # log('target:',target)
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        # break
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()

        loss.backward()

        if args.clip_gradient is not None:
            total_norm = clip_grad_norm(model.parameters(), args.clip_gradient)
            if total_norm > args.clip_gradient:
                log("clipping gradient: {} with coef {}".format(
                    total_norm, args.clip_gradient / total_norm))

        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.log_freq == 0:
            log(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t'
                 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                     epoch,
                     i,
                     len(train_loader),
                     batch_time=batch_time,
                     data_time=data_time,
                     loss=losses,
                     top1=top1,
                     top5=top5,
                     lr=optimizer.param_groups[-1]['lr'])))