Exemple #1
0
def extract_feature(opt, video_dir, C3D_model):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    load_image_fn = None
    data = Video(opt, video_dir, load_image_fn,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    c3d_features = []
    for i, clip in enumerate(data_loader):

        print(clip.mean())

        ## c3d feats
        clip = clip.to(opt.device)
        with torch.no_grad():
            c3d_outputs = C3D_model(clip)

        # 汇总
        c3d_features.append(c3d_outputs.cpu().data) # torch.Size([8, 512, 14, 14])

    c3d_features = torch.cat(c3d_features, 0)  # c3d feature of one video


    return c3d_features.cpu().numpy()
def classify_video(video_dir, video_name, class_names, model, opt):
    # print("video_dir: {}, video_name: {}".format(video_dir,video_name));
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    video_outputs = []
    # video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        # video_segments.append(segments)

    if len(video_outputs) != 0:
        video_outputs = torch.cat(video_outputs)
        return video_outputs.numpy()
    else:
        return None
Exemple #3
0
def classify_video(video_dir,
                   video_name,
                   class_names,
                   model,
                   opt,
                   annotation_digit=5):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    print('reading file from: ', video_dir, 'file name: ', video_name)

    video_outputs = []
    video_segments = []
    shit_lol = enumerate(data_loader)
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    results = {'video': video_name, 'clips': []}

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()
            clip_results['ground_truth_annotaion'] = annotation_digit

        results['clips'].append(clip_results)

    return results
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']
    print('video_name, class_names', video_name)
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    print('Running on video', video_dir)

    #print ('Data loader size', len(data_loader))
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        print(i, inputs.size(), segments.shape)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    #print('Video outputs and segments', video_outputs)
    results = {'video': video_name, 'clips': []}
    if len(video_outputs) > 0:
        print('Video outputs and segments: ', video_outputs[0].shape)

        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)

        _, max_indices = video_outputs.max(dim=1)
        print('Video outputs', video_outputs.size())
        for i in range(video_outputs.size(0)):
            clip_results = {
                'segment': video_segments[i].tolist(),
            }

            if opt.mode == 'score':
                clip_results['label'] = class_names[max_indices[i]]
                clip_results['scores'] = video_outputs[i].tolist()
            elif opt.mode == 'feature':
                clip_results['features'] = video_outputs[i].tolist()

            results['clips'].append(clip_results)

    return results
Exemple #5
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration,
                 stride=opt.stride)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    video_outputs = []
    video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    if len(video_outputs) == 0:
        with open("error.list", 'a') as fout:
            fout.write("{}\n".format(video_name))
        return {}

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)

    results = {
        'video': video_name,
        'clips': []
    }

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()

        results['clips'].append(clip_results)

    return results
Exemple #6
0
def extract_feature(opt, video_dir, C3D_model, load_image_fn, C2D_model,
                    c2d_shape, duration):
    assert opt.mode in ['score', 'feature']
    C, H, W = c2d_shape

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)

    opt.num_segments = max(int(duration / opt.clip_len), 1)
    data = Video(opt,
                 video_dir,
                 load_image_fn,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=0,
                                              pin_memory=True)

    c3d_features = []
    c2d_features = []
    for i, (clip, frames_npy_data) in enumerate(data_loader):

        ## c3d feats
        clip = clip.to(opt.device)
        with torch.no_grad():
            c3d_outputs = C3D_model(clip)

        frames = frames_npy_data.to(opt.device)
        with torch.no_grad():
            c2d_outputs = C2D_model(frames).squeeze()
            if len(c2d_outputs.shape) == 1:
                c2d_outputs = c2d_outputs.unsqueeze(0)

        # 汇总
        c3d_features.append(c3d_outputs.cpu().data)
        c2d_features.append(c2d_outputs.cpu().data)

    try:
        c3d_features = torch.cat(c3d_features)  # c3d feature of one video
        c2d_features = torch.cat(c2d_features)  # c3d feature of one video
    except:
        return None, None

    return c3d_features.cpu().numpy(), c2d_features.cpu().numpy()
Exemple #7
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []

    with torch.no_grad():
        for i, (inputs, segments) in enumerate(data_loader):
            inputs = Variable(inputs)
            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)
        results = {'video': video_name, 'clips': []}

        os.mkdir('features/' + video_name.split('.')[0])

        mypath = 'features/' + video_name.split('.')[0] + '/'

        _, max_indices = video_outputs.max(dim=1)
        for i in range(video_outputs.size(0)):

            with open(mypath + str(i) + '.txt', 'w+') as f:

                f.write(' '.join(map(str, video_outputs[i].tolist())))

        return results
Exemple #8
0
def classify_video(video_dir, video_name, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []

    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    # results = {
    #     'video': video_name,
    #     'clips': []
    # }
    clips = []
    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        clip_results['features'] = video_outputs[i].tolist()
        clips.append(clip_results)

    return video_name, clips
def classify_video(video_dir, video_name, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    video_outputs = []
    video_segments = []

    with torch.no_grad():
        for i, (inputs, segments) in enumerate(data_loader):

            inputs = Variable(inputs)

            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

    if video_outputs:
        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)

    results = dict()
    results['video'] = video_name
    results['features'] = video_outputs
    results['clips'] = video_segments

    return results
Exemple #10
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode == 'feature'

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    with torch.no_grad():

        for i, (inputs, segments) in enumerate(data_loader):
            inputs = Variable(inputs)
            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    # video_segments = torch.cat(video_segments)
    results = []

    for i in range(video_outputs.size(0)):
        clip_results = np.expand_dims(video_outputs[i].numpy(), axis=0)

        results.append(clip_results)
    results = np.concatenate(results, axis=0)
    return results
Exemple #11
0
    cls_acc = cls_hit / cls_cnt
    print(cls_acc)
    print('Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100))
    with open(opt.result_path, 'w') as f:
        json.dump(test_results, f)

if __name__ == '__main__':
    opt = parse_opts()
    opt.mean = get_mean()
    opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
    opt.sample_duration = 16
    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(1),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(opt.val_list, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration, n_samples_for_each_video=0)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)
    model, _ = generate_model(opt)
    model = nn.DataParallel(model, device_ids=opt.gpus).cuda()
    print('loading model {}'.format(opt.model))
    model_data = torch.load(opt.model)
    assert opt.arch == model_data['arch']
    model.load_state_dict(model_data['state_dict'])
    model.eval()
    test(data_loader, model, opt)

Exemple #12
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        outputs = F.softmax(outputs, dim=1)
        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)

    video_segments = torch.cat(video_segments)

    results = {'video': video_name, 'clips': []}

    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }
        label = get_video_results(video_outputs[i], class_names, 5)
        clip_results['label'] = label
        results['clips'].append(clip_results)

#     _, max_indices = video_outputs.max(dim=1)
#     for i in range(video_outputs.size(0)):
#         clip_results = {
#             'segment': video_segments[i].tolist(),
#         }

#         if opt.mode == 'score':
#             clip_results['label'] = class_names[max_indices[i]]
#             clip_results['scores'] = video_outputs[i, max_indices[i]].item()
#         elif opt.mode == 'feature':
#             clip_results['features'] = video_outputs[i].tolist()

#         results['clips'].append(clip_results)

#     average_scores = torch.mean(video_outputs, dim=0)
#     video_results, predicted_labels = get_video_results(average_scores, class_names, 1)

#     video_results = get_video_results(average_scores, class_names, 5)
#     results = {
#         'video': video_name,
#         'result': video_results,
# #         'predicted_labels': predicted_labels
#     }
    return results
subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(test_video),
                shell=True)

# In[173]:

test_results = {'results': {}}
end_time = time.time()
output_buffer = []
previous_video_id = ''
batch_time = AverageMeter(name='Meter', length=10)
data_time = AverageMeter(name='Meter', length=10)

# In[171]:

data = Video('tmp',
             spatial_transform=spatial_transform,
             temporal_transform=temporal_transform,
             sample_duration=sample_duration)

# In[172]:

data_loader = torch.utils.data.DataLoader(data,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=4,
                                          pin_memory=True)

# In[174]:

videoPath = "../dataset/{}/*".format("hmdb")
activity_classes = [i.split(os.path.sep)[3] for i in glob.glob(videoPath)]
print(activity_classes)
Exemple #14
0
    opt.sample_duration = 16
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    print('#####', opt.scales)
    print(opt.mean)
    spatial_transform = Compose([
        MultiScaleCornerCrop(opt.scales, opt.sample_size),
        RandomHorizontalFlip(),
        ToTensor(1),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = TemporalRandomCrop(opt.sample_duration)
    train_data = Video(opt.train_list,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       sample_duration=opt.sample_duration,
                       n_samples_for_each_video=1)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.n_threads,
                                               pin_memory=True)

    val_spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(1),
        Normalize(opt.mean, [1, 1, 1])
    ])
    val_temporal_transform = LoopPadding(opt.sample_duration)
Exemple #15
0
def train_main_multi_batch(model, input_root_dir, opt):
    ####

    epoch_logger = logging.getLogger('info')
    batch_logger = logging.getLogger('info')

    elogHandler = logging.StreamHandler()
    eformatter = jsonlogger.JsonFormatter()
    elogHandler.setFormatter(eformatter)
    epoch_logger.addHandler(elogHandler)

    blogHandler = logging.StreamHandler()
    bformatter = jsonlogger.JsonFormatter()
    blogHandler.setFormatter(bformatter)
    batch_logger.addHandler(blogHandler)

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)

    # criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    if not opt.no_cuda:
        criterion = criterion.cuda()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    epoch = 1

    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()

    ii = 0

    previous_label = "FAKE"
    pre_previous_label = "FAKE"

    for files_dir in os.listdir(input_root_dir):
        sub_path = os.path.join(input_root_dir, files_dir)
        print("Files dir: " + files_dir)
        print("Sub path:" + sub_path)

        data_file_path = os.path.join(sub_path, 'metadata.json')
        with open(data_file_path, 'r') as data_file:
            labels = json.load(data_file)

        opt.batch_size = 36
        total_batch_size = len(os.listdir(sub_path))
        i = 0
        input_files = os.listdir(sub_path)
        for inp_num in range(1, len(input_files), 2):
            print("Lala: " + str(inp_num))
            # print(input_files)
            input_file1 = input_files[inp_num]
            input_file2 = input_files[inp_num - 1]
            if input_file1.endswith(".mp4") and input_file2.endswith(".mp4"):

                video_path1 = os.path.join(sub_path, input_file1)
                video_path2 = os.path.join(sub_path, input_file2)

                label1 = labels[input_file1]
                label2 = labels[input_file2]

                if label1['label'] != previous_label or label1[
                        'label'] != pre_previous_label:

                    previous_label = label1['label']

                    subprocess.call('mkdir tmp', shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 tmp/image_%05d.jpg'
                        .format(video_path1),
                        shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 -start_number 289 tmp/image_%05d.jpg'
                        .format(video_path2),
                        shell=True)

                    video_dir = '{}tmp/'.format(
                        '/data/codebases/video_classification/')

                    data = Video(video_dir,
                                 spatial_transform=spatial_transform,
                                 temporal_transform=temporal_transform,
                                 sample_duration=opt.sample_duration)

                    data_loader = torch.utils.data.DataLoader(
                        data,
                        batch_size=opt.batch_size,
                        shuffle=False,
                        num_workers=opt.n_threads,
                        pin_memory=True)

                    for k, (inputs, targets) in enumerate(data_loader):
                        data_time.update(time.time() - end_time)

                        print("Label: " + label1['label'] + ", " +
                              label2['label'])

                        # # FOR CROSS ENTROPY LOSS
                        # targets = torch.zeros([18, 1], dtype=torch.long)
                        # for j in range(0,18):
                        #     if(label['label'] == 'FAKE'):
                        #         targets[j][0] = 0
                        #         # targets[j][1] = 1
                        #     else:
                        #         targets[j][0] = 1
                        #         # targets[j][1] = 0

                        # FOR MSE LOSS
                        targets = torch.zeros([opt.batch_size, opt.n_classes],
                                              dtype=torch.float)
                        for j in range(0, int(opt.batch_size / 2)):
                            if (label1['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        for j in range(int(opt.batch_size / 2),
                                       opt.batch_size):
                            if (label2['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        if not opt.no_cuda:
                            targets = targets.cuda(non_blocking=True)
                        inputs = Variable(inputs)
                        targets = Variable(targets)
                        outputs = model(inputs)

                        print(outputs.t())
                        print(targets.t())

                        # FOR CROSS ENTROPY LOSS
                        # loss = criterion(outputs, torch.max(targets, 1)[1])
                        # FOR MSE LOSS
                        loss = criterion(outputs, targets)

                        print(loss)

                        # FOR CROSS ENTROPY LOSS
                        # acc = calculate_accuracy(outputs, targets)
                        # FOR MSE LOSS
                        acc = calculate_accuracy_mse(outputs, targets)

                        print(acc)

                        try:
                            losses.update(loss.data[0], inputs.size(0))
                        except:
                            losses.update(loss.data, inputs.size(0))
                        accuracies.update(acc, inputs.size(0))

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                        batch_time.update(time.time() - end_time)
                        end_time = time.time()

                        batch_logger.log(
                            1, {
                                'epoch': epoch,
                                'batch': i + 1,
                                'iter': (epoch - 1) * opt.batch_size + (i + 1),
                                'loss': losses.val,
                                'acc': accuracies.val,
                                'lr': optimizer.param_groups[0]['lr']
                            })

                        print(
                            'Epoch: [{0}][{1}/{2}]\t'
                            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                            'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                            'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                                epoch,
                                i + 1,
                                opt.batch_size,
                                batch_time=batch_time,
                                data_time=data_time,
                                loss=losses,
                                acc=accuracies))
                        ii += 1
                    subprocess.call('rm -rf tmp', shell=True)
                i += 1

            if ii % 100 == 0:
                save_loc = '/data/codebases/video_classification/model{}.pth'.format(
                    ii)
                torch.save(model.state_dict(), save_loc)
        epoch_logger.log(
            1, {
                'epoch': epoch,
                'loss': losses.avg,
                'acc': accuracies.avg,
                'lr': optimizer.param_groups[0]['lr']
            })
        print('XXX Epoch: [{0}]\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch,
                                                         i + 1,
                                                         opt.batch_size,
                                                         batch_time=batch_time,
                                                         data_time=data_time,
                                                         loss=losses,
                                                         acc=accuracies))
    exit(1)
Exemple #16
0
def extract_features(video_dir,
                     video_name,
                     class_names,
                     model,
                     opt,
                     annotation_digit=5):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    # print('reading file from: ', video_dir, 'file name: ', video_name)

    video_outputs = []
    video_segments = []
    model.eval()
    for i, (inputs, segments) in enumerate(data_loader):
        # inputs = Variable(inputs, volatile=True)
        inputs = inputs.cuda()
        outputs = model(inputs)
        # outputs_cpu = outputs.cpu().data.numpy()
        # video_outputs += outputs_cpu
        # video_outputs += outputs.cpu().data
        # np.vstack([video_outputs, outputs_cpu])
        video_outputs.append(outputs.cpu().data)
        # video_outputs.cat(video_outputs, outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    results = {'video': video_name, 'clips': []}

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()
            clip_results['ground_truth_annotaion'] = annotation_digit

        results['clips'].append(clip_results)

    total_feature_vectors = len(results["clips"])
    np_data = np.array([], dtype=np.float64).reshape(0, 2048)
    for features_in_one_video in range(total_feature_vectors):
        # for i in result[1]["clips"]:
        # print (i["scores"])
        one_feature_vector = results["clips"][features_in_one_video][
            "features"]
        a = np.asarray(one_feature_vector)
        # print(a)
        np_data = np.vstack([np_data, a])

    return np_data