コード例 #1
0
class ExtructFeatrue(object):
    def __init__(self):
        saved = torch.load(
            '/mnt/workspace/model/activitynet_clip_kinetics600_dpn107_rgb_model/activitynet_clip_600_dpn107_rgb_model_best_074.pth.tar'
        )
        self.model = TSN(201, 3, 'RGB', 'dpn107', 1)
        self.train_augmentation = self.model.get_augmentation()
        self.input_mean = self.model.input_mean
        self.input_std = self.model.input_std
        self.softmax = nn.Softmax(dim=-1).cuda()
        self.model = nn.DataParallel(self.model)
        self.model.load_state_dict(saved['state_dict'])

        self.base_model = nn.DataParallel(self.model.module.base_model).cuda()
        self.new_fc = nn.DataParallel(self.model.module.new_fc).cuda()

        self.model.eval()
        self.base_model.eval()
        self.new_fc.eval()

    def loadFeatrue(self, x):

        midfeature = self.base_model(x)
        classfeature = self.softmax(self.new_fc(midfeature))
        return midfeature, classfeature
コード例 #2
0
ファイル: train.py プロジェクト: Desny/ECO
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        #根据自己定义的网络,声明train_model
        train_model = TSN(args.batch_size, 32)
        train_model.train()
        opt = fluid.optimizer.Momentum(0.001,
                                       0.9,
                                       parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # 迭代器,每次输送一个批次的数据,每个数据的形式(imgs, label)
        train_reader = UCFReader('train', train_config).create_reader()

        epochs = args.epoch
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype(
                    'float32'
                )  # dy_x_data=imgs  shape:(batch_size, seg_num*seglen, 3, target_size, target_size)
                y_data = np.array([x[1] for x in data]).astype(
                    'int64')  # y_data=label  shape:(batch_size,)

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label = fluid.layers.reshape(label, [args.batch_size, 1])
                label.stop_gradient = True  # 反向求导时,只有目标函数中的label部分对label求导(而不是label与img融合求导)

                out, acc = train_model(img, label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    fluid.dygraph.save_dygraph(train_model.state_dict(),
                                               args.save_dir + '/tsn_model')
        print("Final loss: {}".format(avg_loss.numpy()))
コード例 #3
0
def rgb_model():
    net = TSN(2,
              1,
              'RGB',
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              dropout=args.dropout)
    checkpoint = torch.load(args.rgb_weights)
    # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
    net.load_state_dict(checkpoint['state_dict'])
    return net
コード例 #4
0
ファイル: test.py プロジェクト: ljh9999/TSN-simplify-pytorch
def opf_model():
    net = TSN(2,
              1,
              'Flow',
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              dropout=args.dropout)
    checkpoint = torch.load("475_inceptionv4__flow_model_best.pth.tar")
    # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
    net.load_state_dict(checkpoint['state_dict'])
    return net
コード例 #5
0
def get_executor(use_gpu=True):
    # torch_module = MobileNetV2(n_class=27)
    # if not os.path.exists("mobilenetv2_jester_online.pth.tar"):  # checkpoint not downloaded
    #     print('Downloading PyTorch checkpoint...')
    #     import urllib.request
    #     url = 'https://file.lzhu.me/projects/tsm/models/mobilenetv2_jester_online.pth.tar'
    #     urllib.request.urlretrieve(url, './mobilenetv2_jester_online.pth.tar')
    # torch_module.load_state_dict(torch.load("mobilenetv2_jester_online.pth.tar"))
    # torch_inputs = (torch.rand(1, 3, 224, 224),
    #                 torch.zeros([1, 3, 56, 56]),
    #                 torch.zeros([1, 4, 28, 28]),
    #                 torch.zeros([1, 4, 28, 28]),
    #                 torch.zeros([1, 8, 14, 14]),
    #                 torch.zeros([1, 8, 14, 14]),
    #                 torch.zeros([1, 8, 14, 14]),
    #                 torch.zeros([1, 12, 14, 14]),
    #                 torch.zeros([1, 12, 14, 14]),
    #                 torch.zeros([1, 20, 7, 7]),
    #                 torch.zeros([1, 20, 7, 7]))

    torch_module = TSN(2, 1, 'RGB',
                       base_model='mobilenetv2',
                       consensus_type='avg',
                       img_feature_dim=256,
                       pretrain='imagenet',
                       # is_shift=False, shift_div=8, shift_place='blockres',
                       is_shift=True, shift_div=8, shift_place='blockres',
                       # non_local='_nl' in './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar',
                       non_local='_nl' in pt_path,
                       )
    checkpoint = torch.load(
        # './checkpoint/TSM_HockeyFights_RGB_mobilenetv2_shift8_blockres_avg_segment8_e100/ckpt.best.pth.tar')
        pt_path)
    checkpoint = checkpoint['state_dict']

    # base_dict = {('base_model.' + k).replace('base_model.fc', 'new_fc'): v for k, v in list(checkpoint.items())}
    base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint.items())}
    replace_dict = {'base_model.classifier.weight': 'new_fc.weight',
                    'base_model.classifier.bias': 'new_fc.bias',
                    }
    for k, v in replace_dict.items():
        if k in base_dict:
            base_dict[v] = base_dict.pop(k)
    torch_module.load_state_dict(base_dict)
    torch_inputs = (torch.rand(1, 24, 224, 224))
    # torch_inputs = torch.rand(1, 24, 224, 224)

    if use_gpu:
        target = 'cuda'
    else:
        target = 'llvm -mcpu=cortex-a72 -target=armv7l-linux-gnueabihf'
    return torch2executor(torch_module, torch_inputs, target)
コード例 #6
0
    def __init__(self):
        super(TSN_BIT, self).__init__()
        self.tsn = TSN(num_class, num_segments=num_segments, modality=modality,
            base_model=arch,
            consensus_type=crop_fusion_type,
            dropout=0.7)

        self.activation = nn.LeakyReLU()
        self.fc1 = nn.Linear(51, 32)
        self.fc2 = nn.Linear(32, 21)
        self.model_name = '2019-01-20_23-57-32.pth'

        self._load_tsn_rgb_weight()
    def __init__(self):
        super(TSN_BIT, self).__init__()
        self.tsn = TSN(num_class, num_segments=data_length, modality=modality,
            base_model=arch,
            consensus_type=crop_fusion_type,
            dropout=0.7)

        self.activation = nn.LeakyReLU()
        self.fc1 = nn.Linear(101, 32)
        self.fc2 = nn.Linear(32, 8)
        self.model_name = 'TSN_Flow_2019-01-23_17-06-15.pth'

        # self._load_tsn_flow_weight()
        self._load_pretrained_model(self.model_name)
コード例 #8
0
def load_net(RGBweights, Flowweights):	
	# weights: model weight

	global RGBnet
	global Flownet
	global num_class

	#******************* load RGB Net **********************
	print('Loading RGB Net......')
	RGBnet = TSN(num_class, 1, 'RGB',
			  base_model='BNInception',
			  consensus_type='avg',
			  dropout=0.7)

	checkpoint = torch.load(RGBweights)
	base_dict_RGB = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
	RGBnet.load_state_dict(base_dict_RGB)
	print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))

	#******************* load RGB Net **********************
	print('Loading Flow Net......')
	Flownet = TSN(num_class, 1, 'Flow',
			  base_model='BNInception',
			  consensus_type='avg',
			  dropout=0.7)

	checkpoint = torch.load(Flowweights)
	print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
	
	base_dict_Flow = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
	Flownet.load_state_dict(base_dict_Flow)
コード例 #9
0
    def __init__(self):
        super(TSN_BIT, self).__init__()
        self.tsn = TSN(num_class, num_segments=num_segments, modality=modality,
            base_model=arch,
            consensus_type=crop_fusion_type,
            dropout=0.7)

        self.activation = nn.LeakyReLU()
        self.fc1 = nn.Linear(101, 32)
        self.fc2 = nn.Linear(32, 8)
コード例 #10
0
ファイル: test_models.py プロジェクト: iqDF/tsn-pytorch
def init_model(num_classes, new_length, args):
    model = TSN(num_classes,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                new_length=new_length,
                consensus_type=args.consensus_type,
                dropout=0.5,
                partial_bn=False)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_size = model.input_size
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    cropping = torchvision.transforms.Compose([
        GroupScale(scale_size),
        GroupCenterCrop(input_size),
    ])
    return model
コード例 #11
0
ファイル: inference.py プロジェクト: blowing-wind/ECO-paddle
def main():
    args = parser.parse_args()
    if args.dataset == 'ucf101':
        args.num_class = 101
    elif args.dataset == 'hmdb51':
        args.num_class = 51
    elif args.dataset == 'kinetics':
        args.num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    place = fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        model = TSN(args.num_class,
                    args.num_segments,
                    args.modality,
                    args.arch,
                    dropout=0)
        args.short_size = model.scale_size
        args.target_size = model.crop_size
        args.input_mean = model.input_mean
        args.input_std = model.input_std * 3

        state_dict = fluid.dygraph.load_dygraph(args.load_path)[0]
        model.set_dict(state_dict)

        test_reader = KineticsReader('test', args,
                                     args.test_list).create_reader()
        log = open(args.log_path, 'w')

        model.eval()
        avg_acc = AverageMeter()

        for batch_id, data in enumerate(test_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True

            out, acc = model(img, label)

            avg_acc.update(acc.numpy()[0], label.shape[0])
            if (batch_id + 1) % args.print_freq == 0:
                output = 'Test batch_id:{} | acc {} | avg acc:{}'.format(
                    batch_id + 1,
                    acc.numpy()[0], avg_acc.avg)
                print(output)
                log.write(output + '\n')
                log.flush()
        output = 'Test Avg acc:{}'.format(avg_acc.avg)
        print(output)
        log.write(output + '\n')
        log.flush()
        log.close()
コード例 #12
0
def eval(args):
    # parse config
    config = parse_config(args.config)
    val_config = merge_configs(config, 'valid', vars(args))
    # print_configs(val_config, "Valid")
    with fluid.dygraph.guard():
        val_model = TSN(args.batch_size, 32)

        label_dic = np.load('work/UCF-101_jpg/label_dir.npy',
                            allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        val_reader = UCFReader('valid', val_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()

        acc_list = []
        for batch_id, data in enumerate(val_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label = fluid.layers.reshape(label, [args.batch_size, 1])
            label.stop_gradient = True

            out, acc = val_model(img, label)
            print('batch_id=', batch_id, 'acc=', acc.numpy())
            acc_list.append(acc.numpy())

        print("验证集准确率为:{}".format(np.mean(acc_list)))
コード例 #13
0
def infer(args):
    # parse config
    config = parse_config(args.config)
    infer_config = merge_configs(config, 'infer', vars(args))
    # print_configs(infer_config, "Infer")
    with fluid.dygraph.guard():
        infer_model = TSN(args.batch_size, 32)

        label_dic = np.load('work/UCF-101_jpg/label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        infer_reader = UCFReader('infer', infer_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()
            
        para_state_dict, _ = fluid.load_dygraph(weights)
        # print('para_state_dict:', para_state_dict)
        infer_model.load_dict(para_state_dict)
        infer_model.eval()
        
        for batch_id, data in enumerate(infer_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data])
            
            img = fluid.dygraph.to_variable(dy_x_data)
            
            out = infer_model(img)
            label_id = fluid.layers.argmax(out, axis=1).numpy()[0]

            print("实际标签{}, 预测结果{}".format(y_data, label_dic[label_id]))
コード例 #14
0
args = parser.parse_args()

if args.dataset == 'ucf101':
    num_class = 101
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
elif args.dataset == 'VideoNet':
    num_class = 353
else:
    raise ValueError('Unknown dataset ' + args.dataset)

net = TSN(num_class,
          1,
          args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          dropout=args.dropout)

checkpoint = torch.load(args.weights)
print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                              checkpoint['best_prec1']))

base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}
net.load_state_dict(base_dict)

if args.test_crops == 1:
    cropping = torchvision.transforms.Compose([
コード例 #15
0
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    res = []
    for k in topk:
         correct_k = correct[:k].view(-1).float().sum(0)
         res.append(correct_k.mul_(100.0 / batch_size))
    return res



categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality)
num_class = len(categories)

net = TSN(num_class, args.test_segments if args.crop_fusion_type in ['TRN','TRNmultiscale'] else 1, args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          img_feature_dim=args.img_feature_dim,
          )

checkpoint = torch.load(args.weights)
print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))

base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
net.load_state_dict(base_dict)

if args.test_crops == 1:
    cropping = torchvision.transforms.Compose([
        GroupScale(net.scale_size),
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
コード例 #16
0
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        "emmanuelle", "RGBFlow")
    num_class = len(categories)

    originalNet = TSN(
        27,
        args.test_segments if args.consensus_type in ['MLP'] else 1,
        "RGBFlow",
        base_model=args.arch,
        consensus_type=args.consensus_type,
        img_feature_dim=args.img_feature_dim,
    )

    torch.save(originalNet, "emmanuelle.pth")
    emmanuelleNet = originalNet.base_model

    print(
        "-----------------------------------------------------------------------------------------------------------------"
    )
    # print("Emmanuelle Net:", type(emmanuelleNet))
    print("Children:", len(list(emmanuelleNet.named_children())))
    for name, child in emmanuelleNet.named_children():
        print("    ", name.ljust(30), ":", child)
    for op in emmanuelleNet._op_list:
コード例 #17
0
parser.add_argument('--weight', type=str)

args = parser.parse_args()

# Get dataset categories.
categories_file = 'pretrain/{}_categories.txt'.format(args.dataset)
categories = [line.rstrip() for line in open(categories_file, 'r').readlines()]
num_class = len(categories)

args.arch = 'InceptionV3' if args.dataset == 'moments' else 'BNInception'

# Load model.
net = TSN(num_class,
          args.test_segments,
          args.modality,
          base_model=args.arch,
          consensus_type=args.consensus_type,
          img_feature_dim=args.img_feature_dim,
          print_spec=False)

weights = args.weight
checkpoint = torch.load(weights)
#print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))

base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}
net.load_state_dict(base_dict)
net.cuda().eval()
コード例 #18
0
def main():

    torch.set_printoptions(precision=6)

    global args, best_prec1
    args = parser.parse_args()
    #导入参数设置数据集类数量
    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'cad':
        num_class = 8
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    """
    #导入模型,输入包含分类的类别数:
    # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3;
    # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等;
    # 采用哪种模型:args.arch,比如resnet101,BNInception等;
    # 不同输入snippet的融合方式:args.consensus_type,比如avg等;
    # dropout参数:args.dropout。
    """
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    """
    接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。
    args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,
    因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。
    其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。
    model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。

    """
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    """
    接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型,
    一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。
    torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装,
    这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签,
    而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。
    对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。

    """

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=3,
                                             pin_memory=True)
    """
    接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。
    adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率,
    在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。
    train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。
    最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。
    prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。
    最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。
    模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。

    """
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    '''
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    '''
    # try Adam instead.
    optimizer = torch.optim.Adam(policies, args.lr)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
コード例 #19
0
if args.dataset == 'ucf101':
    num_class = 101
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
elif args.dataset == 'meitu':
    num_class = 50
else:
    raise ValueError('Unknown dataset ' + args.dataset)

net = TSN(num_class,
          1,
          args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          before_softmax=True,
          dropout=args.dropout)

checkpoint = torch.load(args.weights)
print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                              checkpoint['best_prec1']))

base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}

net.load_state_dict(base_dict)
コード例 #20
0
def main():
    global args, best_prec1, class_to_name
    parser.add_argument('--class_index', type=str, help='class index file')
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'something':
        num_class = 174
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.dataset == 'something':
        img_prefix = ''
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {
            idx: line.strip().replace(' ', '-')
            for idx, line in enumerate(content)
        }
    else:
        img_prefix = 'image_'
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \
                for line in content}

    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)
    if not (args.consensus_type == 'lstm'
            or args.consensus_type == 'conv_lstm'):
        args.lstm_out_type = None
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                lstm_out_type=args.lstm_out_type,
                lstm_layers=args.lstm_layers,
                lstm_hidden_dims=args.lstm_hidden_dims,
                conv_lstm_kernel=args.conv_lstm_kernel,
                bi_add_clf=args.bi_add_clf,
                bi_out_dims=args.bi_out_dims,
                bi_rank=args.bi_rank,
                bi_att_softmax=args.bi_att_softmax,
                bi_filter_size=args.bi_filter_size,
                bi_dropout=args.bi_dropout,
                bi_conv_dropout=args.bi_conv_dropout,
                get_att_maps=True,
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()
    # print(model)
    # input('...')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            # print(model)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
            # input('...')
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
        rev_normalize = ReverseGroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 10
        # data_length = 5

    if args.val_reverse:
        val_temp_transform = ReverseFrames(size=data_length *
                                           args.num_segments)
        print('using reverse val')
    elif args.val_shuffle:
        val_temp_transform = ShuffleFrames(size=data_length *
                                           args.num_segments)
        print('using shuffle val')
    else:
        val_temp_transform = IdentityTransform()
        print('using normal val')
    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=img_prefix + "{:05d}.jpg" if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
            # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            random_shift=False,
            temp_transform=val_temp_transform,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w')
    print('visualizing...')
    val_logger = os.path.join(args.result_path, 'visualize.log')
    validate(val_loader,
             model,
             0,
             val_logger=val_logger,
             rev_normalize=rev_normalize)
    return
コード例 #21
0
def main():

    global args, best_prec1
    args = Parse_args()

    log.l.info('Input command:\n ===========> python ' + ' '.join(sys.argv) +
               '  ===========>')

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'mm':
        num_class = 500
    elif args.dataset == 'thumos14':
        num_class = 21
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    log.l.info(
        '============= prepare the model and model\'s parameters ============='
    )

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        log.l.info(
            '============== train from checkpoint (finetune mode) ================='
        )
        if os.path.isfile(args.resume):
            log.l.info(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            log.l.info(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            log.l.info(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    log.l.info('============== Now, loading data ... ==============\n')
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(PerFrameData(
        args.frames_root,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        data_gap=args.data_gap,
        test_mode=False,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.data_workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(PerFrameData(
        args.frames_root,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        data_gap=args.data_gap,
        test_mode=True,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.data_workers,
                                             pin_memory=True)

    log.l.info(
        '================= Now, define loss function and optimizer =============='
    )
    weight = torch.from_numpy(np.array([1] + [3] * (num_class - 1)))
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        log.l.info(
            ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                group['name'], len(group['params']), group['lr_mult'],
                group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        log.l.info('Need val the data first...')
        validate(val_loader, model, criterion, 0)

    log.l.info(
        '\n\n===================> TRAIN and VAL begins <===================\n')

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
コード例 #22
0
parser.add_argument('--result', type=str, default='result')

args = parser.parse_args()


if args.dataset == 'ucf101':
    num_class = 101
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
else:
    raise ValueError('Unknown dataset '+args.dataset)

net = TSN(num_class, 1, args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          dropout=args.dropout)
weights = osp.join("weights",args.weights)
checkpoint = torch.load(weights)
print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
net.load_state_dict(checkpoint['state_dict'])
if args.modality != 'RGBDiff':
    normalize = GroupNormalize(net.input_mean, net.input_std)
else:
    normalize = IdentityTransform()
if args.test_crops == 1:
    cropping = torchvision.transforms.Compose([
        GroupScale(net.scale_size),
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
コード例 #23
0
ファイル: test.py プロジェクト: iqDF/tsn-pytorch
def main():
    parser = options()
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'saag01':
        num_class = 2
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=0.5,
                partial_bn=False)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_size = model.input_size
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    cropping = torchvision.transforms.Compose([
        GroupScale(scale_size),
        GroupCenterCrop(input_size),
    ])

    checkpoint = torch.load(args.checkpoint)
    start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']

    state_dict = checkpoint['state_dict']

    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model.load_state_dict(state_dict)

    test_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.test_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.img_prefix + "_{:05d}" +
        args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix +
        "_{}_{:05d}" + args.ext,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(input_mean, input_std),
        ]),
        custom_prefix=args.custom_prefix),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              drop_last=True)

    ### Test ###
    test(model, test_loader, args)
コード例 #24
0
ファイル: test_models.py プロジェクト: lysh/tsn-pytorch
parser.add_argument('--flow_prefix', type=str, default='')

args = parser.parse_args()


if args.dataset == 'ucf101':
    num_class = 101
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
else:
    raise ValueError('Unknown dataset '+args.dataset)

net = TSN(num_class, 1, args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          dropout=args.dropout)

checkpoint = torch.load(args.weights)
print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))

base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
net.load_state_dict(base_dict)

if args.test_crops == 1:
    cropping = torchvision.transforms.Compose([
        GroupScale(net.scale_size),
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
    cropping = torchvision.transforms.Compose([
コード例 #25
0
ファイル: predict_window.py プロジェクト: vnigade/ECO-pytorch
def main():
    global args
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch+" Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")

    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos
        # rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:04d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:04d}.jpg"
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    if _CUDA:
        model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA
    print_model(model)
    if not _CUDA:
        model = torch.nn.DataParallel(model) # CPU

    print("pretrained_parts: ", args.pretrained_parts)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            if _CUDA:
                checkpoint = torch.load(args.resume) # CUDA
            else:
                checkpoint = torch.load(args.resume, map_location='cpu') # CPU
            # if not checkpoint['lr']:
            if "lr" not in checkpoint.keys():
                args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ")
                args.lr = float(args.lr)
            else:
                args.lr = checkpoint['lr']
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})"
                  .format(args.resume, checkpoint['epoch'], args.lr)))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))
    else:
        print("Please specify the checkpoint to pretrained model")
        return

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        #input_mean = [0,0,0] #for debugging
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    end = time.time()
    # data_loader = torch.utils.data.DataLoader(
    dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=True),
                       ToTorchFormatTensor(div=False),
                       #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')),
                       #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')),
                       normalize,
                   ]),
                   test_mode=True,
                   window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE);
    data_loader = torch.utils.data.DataLoader(dataset,
                      batch_size=args.batch_size, shuffle=False,
                      num_workers=args.workers, pin_memory=True,
                      collate_fn=collate_fn)

    # criterion = torch.nn.CrossEntropyLoss().cuda()
    # predict(data_loader, model, criterion, 0)
    predict(dataset, model, criterion=None, iter=0)
    # profile_model(model)
    elapsed_time = time.time() - end    
    print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time))
    return
コード例 #26
0
if args.dataset == 'ucf101':
    num_class = 101
elif args.dataset == 'hmdb51':
    num_class = 51
elif args.dataset == 'kinetics':
    num_class = 400
elif args.dataset == 'virat':
    num_class = 8
else:
    raise ValueError('Unknown dataset ' + args.dataset)

# ipdb.set_trace()
net = TSN(num_class,
          1,
          args.modality,
          base_model=args.arch,
          consensus_type=args.crop_fusion_type,
          dropout=args.dropout)

param_count = 0
for n, param in net.named_parameters():
    param_count += 1
    print(param_count, n, param.requires_grad, param.size())

if args.test_crops == 1:
    cropping = transforms.Compose(
        [GroupScale(net.scale_size),
         GroupCenterCrop(net.input_size)])
elif args.test_crops == 10:
    cropping = transforms.Compose(
        [GroupOverSample(net.input_size, net.scale_size)])
コード例 #27
0
    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'MFF', args.dataset, args.modality, args.arch,
        'segment%d' % args.num_segments,
        '%df1c' % args.num_motion
    ])

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                num_motion=args.num_motion,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn,
                dataset=args.dataset)

    # load pre trained weights
    checkpoint = torch.load(
        "pretrained_models/MFF_jester_RGBFlow_BNInception_segment4_3f1c_best.pth.tar"
    )
    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[2:]): v
        for k, v in list(checkpoint['state_dict'].items())[:-6]
コード例 #28
0
ei = 0
while(os.path.exists(logdir + '/%d/' % ei)):
    ei = ei + 1

#################################
# main loop
#################################

for di in range(0, args.num_experiments):
    p['logdir'] = './%s/%s/%d/%d/' % (args.logdir, expdir, ei, di)
    if(not os.path.exists(p['logdir'])):
        os.makedirs(p['logdir'])

    model = []
    model = TSN(p, dataset_train)
    model = model.cuda(device)

    optim = get_optimizer(args, model)

    max_perf_val = 0.0    
    max_perf_aux = 0.0
    for epoch in range(0, args.num_epochs):
        stats_train = process_epoch('train', epoch, p, dataloader_train, model, optim)
        stats_val = process_epoch('val', epoch, p, dataloader_val, model)
        
        perf_val = stats_val['top1.cause'] + stats_val['top1.effect']
        perf_val_aux = stats_val['top2.cause'] + stats_val['top2.effect']
        if(perf_val >= max_perf_val):
            if(perf_val_aux >= max_perf_aux):
                max_perf_val = perf_val
コード例 #29
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
コード例 #30
0
parser.add_argument('--consensus_type', type=str, default='TRNmultiscale')
parser.add_argument('--weights', type=str)

args = parser.parse_args()

# Get dataset categories.
categories_file = 'pretrain/{}_categories.txt'.format(args.dataset)
categories = [line.rstrip() for line in open(categories_file, 'r').readlines()]
num_class = len(categories)

args.arch = 'InceptionV3' if args.dataset == 'moments' else 'BNInception'

# Load model.
net = TSN(2,
          args.test_segments,
          args.modality,
          base_model=args.arch,
          consensus_type=args.consensus_type,
          img_feature_dim=args.img_feature_dim, print_spec=False)

net = torch.nn.DataParallel(net)
import glob


checkpoint_names = glob.glob('checkpoint_*.pth')
save_acc_dict = {}
best_acc = 0.0
best_cp = None
torch.manual_seed(1111)
import pickle 
from tqdm import tqdm
for checkpoint_name in tqdm(checkpoint_names):
コード例 #31
0
def main():

    global args, best_prec1
    num_class = 4
    rgb_read_format = "{:d}.jpg"

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                'RGB',
                base_model='ECO',
                consensus_type='identity',
                dropout=0.3,
                partial_bn=True)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    model_dir = args.model_path
    new_state_dict = torch.load(model_dir)['state_dict']

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)
    data_length = 1

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality='RGB',
        image_tmpl=rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True)

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    model.eval()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()
        input_var = input
        target_var = target
        output = model(input_var)
        _, pred = output.data.topk(1, 1, True, True)
        print(pred, target)
    print('done')
コード例 #32
0
ファイル: main.py プロジェクト: lysh/tsn-pytorch
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)