예제 #1
0
def model_process(count, model):
    opt = parse_opts()

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    #opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    #print(opt)
    #print(opt.result_path)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    #print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    print('testing is run')

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)

        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        tester.test(count, test_loader, model, opt, test_data.class_names)
예제 #2
0
    print('run')
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            train_epoch(i, train_loader, model, criterion, optimizer, opt,
                        train_logger, train_batch_logger)
        if not opt.no_val:
            validation_loss = val_epoch(i, val_loader, model, criterion, opt,
                                        val_logger)

        if not opt.no_train and not opt.no_val:
            scheduler.step(validation_loss)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test.test(test_loader, model, opt, test_data.class_names)
예제 #3
0
        begin_epoch = 1
    print('run')
    for i in range(begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            train_epoch(i, train_loader, model, criterion, optimizer,
                        arch_optimizer, opt, train_logger, train_batch_logger)
        if not opt.no_val:
            validation_loss = val_epoch(i, val_loader, model, criterion, opt,
                                        val_logger)

        if not opt.no_train and not opt.no_val:
            scheduler.step(validation_loss)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(), norm_method
        ])
        temporal_transform = TemporalSampling(opt.sample_duration)
        target_transform = TargetCompose([VideoID(), ClassLabel()])

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=True,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test.test(test_loader, model, criterion, opt)
예제 #4
0
                ToTensor(opt.norm_value),
                norm_method
            ])
            temporal_transform = TemporalCenterCrop(opt.sample_duration, 1)
        elif opt.model in [
                'I3D_BSL_part', 'I3D_BSL_face', 'I3D_BSL_lhand',
                'I3D_BSL_rhand'
        ]:
            spatial_transform = Compose([
                Scale((256, 256)),
                CenterCrop(224),
                ToTensor(opt.norm_value), norm_method
            ])
            temporal_transform = LoopPadding(64)

        target_transform = VideoID()
        target_transform = TargetCompose([ClassLabel(), VideoID()])

        if opt.model.endswith('flow'):
            test_data = get_test_set(opt,
                                     spatial_transform,
                                     temporal_transform,
                                     target_transform,
                                     modality='flow')
        elif opt.model.endswith('pose'):
            test_data = get_test_set(opt,
                                     spatial_transform,
                                     temporal_transform,
                                     target_transform,
                                     modality='pose')
        elif opt.model.endswith('depth'):
예제 #5
0
def main(args):

    import os
    import numpy as np
    import sys
    import json
    import torch
    from torch import nn
    from torch import optim
    from torch.optim import lr_scheduler

    from opts import parse_opts
    from mean import get_mean, get_std
    from spatial_transforms import (
        Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop,
        MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor)
    from temporal_transforms import LoopPadding, TemporalRandomCrop
    from target_transforms import ClassLabel, VideoID
    from target_transforms import Compose as TargetCompose
    from dataset import get_training_set, get_validation_set, get_test_set
    from utils import Logger
    from train import train_epoch
    from validation import val_epoch
    import test
    import collections
    from sklearn.svm import LinearSVC
    from sklearn.svm import SVC
    from joblib import dump, load
    from sklearn import preprocessing
    from scipy import stats
    from sklearn.metrics import accuracy_score


    local_path = os.getcwd()

    if args.video_directory_path in ["", " ", '', './video', './video/']:
        video_path = local_path + '/video/'
    else:
        video_path = args.video_directory_path
        
    video_path_jpg = local_path + '/video_jpg/'


    if not os.path.exists(video_path_jpg):
        os.makedirs(video_path_jpg)

    extracted_feature_path = local_path + '/extracted_features'
    if not os.path.exists(extracted_feature_path):
        os.makedirs(extracted_feature_path)

    final_results_path = local_path + '/final_test_results'
    if not os.path.exists(final_results_path):
        os.makedirs(final_results_path)

    os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg)
    os.system('python utils/n_frames.py' + ' ' + video_path_jpg)


    if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']:
        pretrain_directory_path = local_path + '/pretrain'
    else:
        pretrain_directory_path = args.pretrain_directory_path


    import easydict
    opt = easydict.EasyDict({
        "n_classes": 2, 
        "sample_size": 112,
        "sample_duration": 16,
        "batch_size": 16,
        "n_threads": 4,
        "norm_value": 1,
        "resnet_shortcut": 'B',
        "resnext_cardinality": 32,
    })
    opt.root_path =  local_path
    opt.video_path = video_path_jpg




    # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have
    os.environ['CUDA_VISIBLE_DEVICES']='0'



    from datasets.no_label_binary import NoLabelBinary

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1,1,1])


    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = VideoID() # ClassLabel()



    # get test data
    test_data = NoLabelBinary(
        opt.video_path,
        None,
        'testing',
        0,
        spatial_transform=spatial_transform,
        temporal_transform=temporal_transform,
        target_transform=target_transform,
        sample_duration=opt.sample_duration)


    # wrap test data
    test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=False)


    # ### Extract Features

    # ##### 3D ResNeXt-101


    from models import resnext

    # construct model architecture
    model_rxt101 = resnext.resnet101(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    cardinality=opt.resnext_cardinality,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rxt101 = model_rxt101.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rxt101.state_dict()
    model_dict.update(pretrain_dict) 
    model_rxt101.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rxt101.eval()


    # forward all the videos to extract features
    avgpool_test = []
    targets_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rxt101(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            targets_test.append(target)



    avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
    np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np)

    targets_test_np = np.concatenate(np.array(targets_test), axis=0)
    np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np)


    # ##### 3D ResNet-50


    from models import resnet

    # construct model architecture
    model_rt50 = resnet.resnet50(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rt50 = model_rt50.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rt50 = nn.DataParallel(model_rt50, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rt50.state_dict()
    model_dict.update(pretrain_dict) 
    model_rt50.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rt50.eval()


    # forward all the videos to extract features
    avgpool_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rt50(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            
        # save the features
        avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
        np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np)    


    # ### Load & fuse the features


    x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy')
    x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy')
    x_test = np.concatenate([x_test_1, x_test_2], axis=1)

    y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy')


    # ### Load Classification head and predict

    if args.model == 'hw4':
        # hw4 best model
        clf = load('./hw6_results/logistic2_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_2)
        y_pred_test_prob_raw = clf.predict_proba(x_test_2)

    elif args.model == 'hw5':
        # hw5 best model
        clf = load('./hw6_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'hw6':
        # hw6 best model
        clf = load('./hw6_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)

    elif args.model == 'hw8':
        # hw8 best model
        clf = load('./hw8_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'final':
        # Final best model
        clf = load('./hw8_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)



    split_idx = []
    for idx, y_name in enumerate(y_test):
        if idx == 0 or y_name != y_test[idx-1]:
            split_idx.append(idx)
    split_idx.append(len(y_test))
            
    y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {}
    for i, split in enumerate(split_idx):
        if i < len(split_idx) - 1:
            y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]]
            y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]]
            y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0))   


    # ### Get the length (in seconds) of each video clip


    tvns = list(y_pred_test_final.keys())
    mp4_path = video_path
    clip_duration_dict = {}

    from moviepy.editor import VideoFileClip
    i = 0
    for tvn in tvns:
        i += 1
        if i % 100 == 0:
            print(i)
        clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4"))
        clip_duration_dict[tvn] = [clip.duration]


    # ### Generate Figures
    import matplotlib.pyplot as plt
    for tvn in clip_duration_dict:
        interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
        x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
        y_idx = np.argmax(y_pred_test_prob[tvn], 1)
        y = y_pred_test_prob[tvn][:, 1]
        x = x[:len(y)]
        plt.plot(x, y)
        plt.ylim([-0.1, 1.1])
        plt.xlabel ('time/sec')
        plt.ylabel ('pred score for ground truth label')
        plt.title("Ground Truth Label:  " + tvn  + "\n Model Avg. Predict Score:  " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score'])
        plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight')
        plt.close()


    # ### Generate Json
    timeTrueLabel = {}
    for tvn in clip_duration_dict:
        if tvn in y_pred_test_prob:
            interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
            x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
            y_idx = np.argmax(y_pred_test_prob[tvn], 1)
            y = y_pred_test_prob[tvn][:, 1]
            x = x[:len(y)]  
            timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)]



    with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp:
        json.dump(timeTrueLabel, fp)
예제 #6
0
def get_ucf_data(opt):

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1, 1, 1])

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()  # VideoID()

    # get training data
    training_data = UCF101(opt.video_path,
                           opt.annotation_path,
                           'training',
                           0,
                           spatial_transform=spatial_transform,
                           temporal_transform=temporal_transform,
                           target_transform=target_transform,
                           sample_duration=16)

    # wrap training data
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=opt.n_threads,
                                               pin_memory=False)  # True

    # get validation data
    val_data = UCF101(opt.video_path,
                      opt.annotation_path,
                      'validation',
                      0,
                      spatial_transform=spatial_transform,
                      temporal_transform=temporal_transform,
                      target_transform=target_transform,
                      sample_duration=16)

    # wrap validation data
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=False)

    target_transform = VideoID()
    # get test data
    test_data = UCF101(opt.video_path,
                       opt.annotation_path,
                       'testing',
                       0,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       target_transform=target_transform,
                       sample_duration=16)

    # wrap test data
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    return train_loader, val_loader, test_loader, test_data
예제 #7
0
        common_temporal_transform = LoopPadding(opt.sample_duration)
        common_spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            RGB2Gray(),
        ])
        target_spatial_transform = Compose([
            ToTensor(opt.norm_value),
            norm_method,
        ])
        input_spatial_transform = Compose([
            LowResolution(opt.spatial_compress_size, use_cv2=opt.use_cv2),
            ToTensor(opt.norm_value),
            norm_method,
        ])
        target_label_transform = VideoID()

        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            RGB2Gray(),
            ToTensor(opt.norm_value),
            norm_method,
        ])

        test_data = get_test_set(opt, common_temporal_transform,
                                 common_spatial_transform,
                                 target_spatial_transform,
                                 input_spatial_transform,
                                 target_label_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
예제 #8
0
def objective(trial):
    opt = parse_opts()

    if trial:
        opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1)
        opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5,
                                                  1 - 4)

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model, parameters = generate_model(opt)
    print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(
            training_data,
            batch_size=opt.batch_size,
            # sampler option is mutually exclusive with shuffle
            shuffle=False,
            sampler=ImbalancedDatasetSampler(training_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

        optimizer = optim.Adam(parameters,
                               lr=opt.learning_rate,
                               weight_decay=opt.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               verbose=True,
                                                               factor=0.1**0.5)
    if not opt.no_val:
        spatial_transform = Compose([
            Scale(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=opt.batch_size,
            shuffle=False,
            sampler=ImbalancedDatasetSampler(validation_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                            ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    writer = SummaryWriter(
        comment=
        f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}"
    )
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            epoch, losses_avg, accuracies_avg = train_epoch(
                i, train_loader, model, criterion, optimizer, opt,
                train_logger, train_batch_logger)
            writer.add_scalar('loss/train', losses_avg, epoch)
            writer.add_scalar('acc/train', accuracies_avg, epoch)

        if not opt.no_val:
            epoch, val_losses_avg, val_accuracies_avg = val_epoch(
                i, val_loader, model, criterion, opt, val_logger)
            writer.add_scalar('loss/val', val_losses_avg, epoch)
            writer.add_scalar('acc/val', val_accuracies_avg, epoch)

        if not opt.no_train and not opt.no_val:
            scheduler.step(val_losses_avg)
        print('=' * 100)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test.test(test_loader, model, opt, test_data.class_names)

    writer.close()
    return val_losses_avg