Example #1
0
    def prepare(self, labels):
        FPS= 30
        video_path, gop_index, targets, ids = [], [], [], []

        for i, (vid, label) in enumerate(labels.items()):
            
            vpath = os.path.join(self._data_root, vid + '.mp4')
            num_frames = get_num_frames(vpath)
            num_gop = num_frames // GOP_SIZE


            # # one hot
            # for gop in range(num_gop//5):
            #     target = torch.IntTensor(157).zero_()
            #     c = 0
            #     for x in label:
            #         if (x['start'] < gop*5*0.4) and ((gop*5+1)*0.4 < x['end']):
            #             target[cls2int(x['class'])] = 1
            #             c = c+1
            #     print(target, c)
            #     # print(c)
            #     if c != 0 :
            #         gop_index.append(gop*5)
            #         video_path.append(vpath)
            #         targets.append(target)
            #         ids.append(vid)


            # one target one gop
            for x in label:
                for gop in range(num_gop):
                    if (x['start'] < gop*5*0.4) and ((gop*5+1)*0.4 < x['end']):
                        video_path.append(vpath)
                        gop_index.append(gop)
                        targets.append(cls2int(x['class']))
                        ids.append(vid)


            # if self._is_train :
            #     for x in label:
            #         for gop in range(num_gop):
            #             if (x['start'] < gop*0.4) and ((gop+1)*0.4 < x['end']):
            #                 video_path.append(vpath)
            #                 gop_index.append(gop)
            #                 targets.append(cls2int(x['class']))
            #                 ids.append(vid)
            # else:
            #     target = torch.IntTensor(157).zero_()
            #     for x in label:
            #         target[cls2int(x['class'])] = 1
                
            #     for gop in range(num_gop):
            #         video_path.append(vpath)
            #         gop_index.append(gop)
            #         targets.append(target)
            #         ids.append(vid)
                
        # print(video_path, gop_index, targets, ids)
        print(gop_index)
        return {'video_path': video_path, 'gop_index': gop_index, 'targets': targets, 'ids': ids}
    def _load_video(self, video_name):
        #選擇擷取特徵
        representation_idx = 0
        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2

        #計算片段數
        total_frames = get_num_frames(video_name)
        total_segments = total_frames // SEG_SIZE

        #把每個片段中間那幀紀錄下來
        frames = []
        for i in range(total_segments):
            gop_idx, gop_pos = self._get_frame_index(total_frames, i)
            img = load(video_name, gop_idx, gop_pos, representation_idx, self._accumulate)
            roi_img = img[int(ROI_Y):int(ROI_Y+ROI_HEIGHT), int(ROI_X):int(ROI_X+ROI_WIDTH)]
            frames.append(roi_img)

        #預設是每3個片段辨識一個動作
        for i in range(2, len(frames)):
            tmp = []
            tmp.append(frames[i-2])
            tmp.append(frames[i-1])
            tmp.append(frames[i])
            self._frames.append(tmp)
        frames.clear()
Example #3
0
    def _load_list(self, video_list):
        self._video_list = []
        with open(video_list, 'r') as f:
            for line in f:
                video, _, label = line.strip().split()
                video_path = os.path.join(self._data_root, video[:-4] + '.mp4')
                self._video_list.append(
                    (video_path, int(label), get_num_frames(video_path)))

        print('%d videos loaded.' % len(self._video_list))
    def _load_list(self, video_list):
        self._video_list = []
        if self._dataset == 'ucf101':  # for ucf
            with open(video_list, 'r') as f:
                for line in f:
                    video, _, label = line.strip().split(' ')
                    video_path = os.path.join(self._data_root,
                                              video[:-4] + '.mp4')
                    self._video_list.append(
                        (video_path, int(label), get_num_frames(video_path)))
        if self._dataset == 'kinetics400':  # for kinetics
            with open(video_list, 'r') as f:
                for line in f:
                    video, label = line.strip().split(',')
                    video_path = os.path.join(self._data_root, video)
                    self._video_list.append(
                        (video_path, int(label), get_num_frames(video_path)))

        print('%d videos loaded.' % len(self._video_list))
Example #5
0
    def _load_list(self, video_list):
        self._video_list = []
        with open(video_list, 'r') as f:
            for line in f:
                video, _, label = line.strip().split()
                video_path = os.path.join(self._data_root, video[:-4] + '.mp4')
                self._video_list.append((
                    video_path,
                    int(label),
                    get_num_frames(video_path)))

        print('%d videos loaded.' % len(self._video_list))
def _parse_function_v2(filename, label, nSegments):

    reps_np = []

    for representation_idx in range(0, 3):

        frames = []
        for seg_idx in range(0, nSegments):
            #print(filename.decode())
            nFrames = get_num_frames(filename.decode())
            #print('nFrames:',nFrames)
            gop_index, gop_pos = getTrainFrameIndex(nFrames, seg_idx,
                                                    nSegments,
                                                    representation_idx)
            #print('gop_index, gop_pos:', gop_index, gop_pos)
            img = load(filename.decode(), gop_index, gop_pos,
                       representation_idx, True)
            #print('H3')
            if img is None:
                #print('Error: loading video %s failed.' % filename.decode())
                img = np.zeros((256, 256, 3))
            else:
                if representation_idx == 1:
                    img = (img * (127.5 / 20)).astype(np.int32)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                    img = np.append(img,
                                    np.zeros_like(img[..., 0, None]),
                                    axis=-1)
                elif representation_idx == 2:
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                else:
                    img = img[..., ::-1]  #flipping to RGB
            #print('H4')
            frames.append(img)

        #np_frames = np.transpose(np.array(frames).astype(np.float32), (0, 3, 1, 2)) / 255.0
        np_frames = np.array(frames).astype(np.float32) / 255.0

        if representation_idx == 0:
            np_frames = (np_frames - DATA_MEAN) / DATA_STD
        elif representation_idx == 2:
            np_frames = (np_frames - 0.5) / DATA_STD
        elif representation_idx == 1:
            np_frames = (np_frames - 0.5)
        np_frames = np_frames[:, 16:240, 52:276, :].astype(np.float32)
        reps_np.append(np_frames)

    return reps_np[0], reps_np[1], reps_np[2], label
Example #7
0
 def _load_list(self, video_list):
     self._video_list = []
     with open(video_list, 'r') as f:
         for line in f:
             video, _, label = line.strip().split()
             #print('video:{}'.format(video))   video:WritingOnBoard/v_WritingOnBoard_g25_c07.avi
             #print('-:{}'.format(_))    WritingOnBoard
             #print('label:{}'.format(int(label)))   99
             video_path = os.path.join(self._data_root, video[:-4] + '.mp4')
             self._video_list.append(
                 (video_path, int(label), get_num_frames(video_path)
                  ))  # questions about get_num_frames????????????
     #print(self._video_list)
     print('{} videos loaded.'.format(len(self._video_list)))
Example #8
0
def main():
    #for video_name in video_names[:2]:
    for video_name in video_names:
        fold_path = video_name.split('.avi')[0].split('/')[-1]
        path_mv = os.path.join(fold_path, PATH_MV_CONT)
        path_res = os.path.join(fold_path, PATH_RES_CONT)
        if not os.path.exists(path_mv):
            os.makedirs(path_mv)
        if not os.path.exists(path_res):
            os.makedirs(path_res)
        NUM_FRAMES = get_num_frames(video_name)
        print(NUM_FRAMES)
        # The index of GOP
        curGopIdx = 0
        for curGopIdx in range(max(NUM_FRAMES // GOP_FRAMES_NUM, 1)):
            for innerGopIdx in range(GOP_FRAMES_NUM):
                curFrameIdx = curGopIdx * GOP_FRAMES_NUM + innerGopIdx
                #rgbFrame = load(video_name, curGopIdx, innerGopIdx, 0, True)

                #start = time.time()
                print(video_name, curGopIdx, innerGopIdx)
                mvCont_origin = load(video_name, curGopIdx, innerGopIdx, 1, False)
                resCont = load(video_name, curGopIdx, innerGopIdx, 2, False)

                if mvCont_origin is None:
                    mvCont_origin = np.zeros([720,960,2], dtype=np.uint8)
                
                mvCont = mvCont_origin + 2048
                # (high_h, low_h, high_w, low_w)
                mvPng = np.array([((mvCont[:,:,0] >> 8) & 0xff) , (mvCont[:,:,0] & 0xff), ((mvCont[:,:,1] >> 8) & 0xff), (mvCont[:,:,1] & 0xff)], dtype = np.uint8)
                mvPng = np.transpose(mvPng, [1,2,0])

                

                imsave(path_mv+'/frame'+str(curFrameIdx)+'.png', mvPng)
                #save_mvPng = imread(path_mv+'/frame'+str(curFrameIdx)+'.png').astype(np.int16)

                #reload_mvCont = np.array([ (save_mvPng[:,:,0] << 8) + (save_mvPng[:,:,1]), (save_mvPng[:,:,2] << 8) + (save_mvPng[:,:,3]) ])
                #reload_mvCont = np.transpose(reload_mvCont, [1,2,0])
                #reload_mvCont -= 2048

                #print((reload_mvCont == mvCont_origin).min())
                if resCont is None:
                    resCont = np.zeros([720,960,3], dtype=np.uint8)
                
                resCont = np.round((resCont + 256)/2).astype(np.uint8)
                #resCont = np.abs(resCont)
                imsave(path_res+'/frame'+str(curFrameIdx)+'.png', resCont)
                cv2.imwrite(PATH_RES_CONT+fold_path+'.png', resCont)
Example #9
0
    def _load_list(self, video_list):
        # video_list: e.g. ucf101_split1_train.txt
        self._video_list = []
        with open(video_list, 'r') as f:
            for line in f:
                # ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01.avi ApplyEyeMakeup 0
                # video = *.avi
                # label = 0
                video, _, label = line.strip().split()
                video_path = os.path.join(self._data_root, video[:-4] + '.mp4')
                self._video_list.append(
                    (video_path, int(label), get_num_frames(video_path)))
                # get_num_frames, METH_VARARGS, "Getting number of frames in a video."}
                # _video_list: path(*.avi), label(0), number of frames

        print('%d videos loaded.' % len(self._video_list))
Example #10
0
def load_list(video_list):
    # video_list: e.g. ucf101_split1_train.txt
    _video_list = []
    with open(video_list, 'r') as f:
        for line in f:
            # ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01.avi ApplyEyeMakeup 0
            # video = *.avi
            # label = 0
            video, folder, label = line.strip().split()
            video_path = os.path.join(data_root, video[:-4] + '.mp4')
            video_name = video[video.rfind('/') + 1:-4]
            num_frames = get_num_frames(video_path)
            _video_list.append(
                (video_path, folder, video_name, int(label), num_frames))
            # _video_list: path(*.avi), foldername(ApplyEyeMakeup),label(0), number of frames
    with open('video_list.p', 'wb') as list_p:
        pickle.dump(_video_list, list_p)
Example #11
0
def _perturbation_image(model, original_image, ori_label, video_path,
                        save_path, transform_post, args, config, device):

    original_image = original_image.to(device)

    total_frames = get_num_frames(video_path)
    original_image_ = original_image.clone()  # torch.Size([1, 3, 72, 84, 84])
    num_frame, channel, height, width = original_image.shape
    dim = height * width * channel
    loop = 0
    inner_loop = 0
    success = False
    num_query = 0
    num_pframe = 0

    max_query = 60000
    exploration = 0.1
    fd_eta = 0.1
    online_lr = 0.1
    flow_lr = 0.025
    target_label = (ori_label + 1) % args.num_classes
    '''
    while target_label == ori_label:
        target_label = torch.tensor([random.sample(range(174), 1)[0]]).cuda()
    '''
    motion_vector = list()

    prior = torch.zeros(num_frame, channel, height, width).to(device)
    delta = torch.zeros(num_frame, channel, height, width).to(device)
    est_grad = torch.zeros(num_frame, channel, height, width).to(device)
    adv_img = torch.zeros(3, num_frame, channel, height, width).to(device)
    iframe = torch.zeros(num_frame, height, width, channel).to(device)
    noise_frames = torch.zeros(num_frame, channel, height, width).to(device)

    index_visual = torch.zeros(num_frame, 2, height, width).to(device)
    index_motion = torch.zeros(num_frame, height, width, 2).to(device)

    while not (num_query > max_query):
        pred_adv_logit = list()
        start1 = time.time()

        end_index = total_frames // GOP_SIZE
        if loop % args.interval == 0:  # can also try 8 for tsn2d
            #mv_index = int(torch.rand(1)*end_index)
            mv_index = inner_loop % end_index
            mv = load(video_path, mv_index, 11, 1, True)

            mv = mv - mv.min()
            mv = np.dstack((mv, np.zeros((mv.shape[:2] + (1, )))))
            mv = [mv.astype(np.uint8)] * num_frame
            inner_loop += 1
            motion_vector = transform_post(mv)
            motion_vector = np.stack(motion_vector, axis=0) * 255
            motion_vector = torch.from_numpy(motion_vector).permute(
                0, 2, 3, 1).float().to(device)

            motion_vector[:, :, :, 0] = (2 * motion_vector[:, :, :, 0] -
                                         height + 1.) / (height - 1.)
            motion_vector[:, :, :, 1] = (2 * motion_vector[:, :, :, 1] -
                                         width + 1.) / (width - 1.)

        noise_frames = torch.randn(1, 3, height,
                                   width).repeat(num_frame, 1, 1, 1).to(device)
        noise_frames = F.grid_sample(noise_frames, motion_vector[:, :, :, :2])

        exp_noise = exploration * noise_frames
        q1 = prior + exp_noise
        q2 = prior - exp_noise
        adv_img[0] = original_image + fd_eta * q1 / norm2(q1)
        adv_img[1] = original_image + fd_eta * q2 / norm2(q2)
        adv_img[2] = original_image
        for i in range(3):
            img_group = normalization(adv_img[i].clone().cpu().numpy(), args)
            tmp_result = model(img_group.astype('float32', copy=False))
            tmp_result = FF.mean(tmp_result, axis=0, keepdims=True)
            tmp_result = torch.from_numpy(tmp_result.asnumpy()).to(device)
            pred_adv_logit.append(tmp_result)

        l1, _, _, _, _, _, _ = _pert_loss(pred_adv_logit[0], ori_label,
                                          target_label, delta)
        l2, _, _, _, _, _, _ = _pert_loss(pred_adv_logit[1], ori_label,
                                          target_label, delta)
        loss, target, real, other, other_class, second_logit, second_class = _pert_loss(
            pred_adv_logit[2], ori_label, target_label, delta)

        num_query += 3
        est_deriv = (l1 - l2) / (fd_eta * exploration * exploration)
        est_grad = est_deriv.item() * exp_noise
        prior += online_lr * est_grad

        original_image = original_image - flow_lr * prior.sign()
        delta = original_image_ - original_image
        tmp_norm = norm2(delta)
        original_image = torch.max(
            torch.min(original_image, original_image_ + 0.03),
            original_image_ - 0.03)
        original_image = torch.clamp(original_image, 0, 1)

        pred_adv_label = pred_adv_logit[2].argmax()
        if (loop % 1000 == 0) or (loop
                                  == max_query) or pred_adv_label != ori_label:
            #if (loop % 1000 ==0) or (loop == max_query) or pred_adv_label == target_label:
            print('[T2]{:.3f}s for [{}]-th loop\t'
                  'Queries {:03d}\t'
                  'Overall loss {:.3f}\t'
                  'est_deriv {:.3f}\t'
                  'Target {}\t'
                  'Target logit {:.3f}\t'
                  'ori logit {:.3f}\t'
                  'ori class {}\t'
                  'second logit {:.3f}\t'
                  'second class {}\t'.format(time.time() - start1,
                                             loop, num_query, loss,
                                             est_deriv.item(), target, real,
                                             other, other_class, second_logit,
                                             second_class))

        loop += 1
        if pred_adv_label != ori_label:
            #if pred_adv_label == target_label:
            #print('Predicted label is {}\t'.format(pred_adv_label))
            diff = adv_img[2] - original_image_
            print('diff max {:.3f}, diff min {:.3f}'.format(
                diff.max(), diff.min()))
            success = True
            #save_images(num_frame, original_image_.cpu().permute(0,2,3,1).numpy(), adv_img[2].cpu().permute(0,2,3,1).numpy(), save_path)
            break

        if num_query >= max_query:
            #save_images(num_frame, original_image_.cpu().permute(0,2,3,1).numpy(), adv_img[2].cpu().permute(0,2,3,1).numpy(), save_path)
            break
    return pred_adv_label, num_query, success
Example #12
0
def make_dataset(opt, root_path, annotation_path, subset,
                 n_samples_for_each_video, frames_sequence):
    data = load_annotation_data(annotation_path)
    #data = torch.load(annotation_path)
    """
    new_dict = {}
    for key,value in data.items():
        if ' ' in key:
            print(key)
            oldkey = key
            key.replace(' ','_')
            new_dict[key] = value
    torch.save(new_dict,"kinetics_annotation_full.json")
    stop
    """
    video_names, annotations = get_video_names_and_annotations(data, subset)
    class_to_idx = get_class_labels(data)
    idx_to_class = {}
    for name, label in class_to_idx.items():
        idx_to_class[label] = name

    dataset = []
    total_number_of_clips = 0
    gflop_per_clip = 8.5

    for i in range(len(video_names)):
        if i % 1000 == 0:
            print('dataset loading [{}/{}]'.format(i, len(video_names)))

        if ' ' in video_names[i]:
            video_names[i] = video_names[i].replace(' ', '_', 1)

        #print(video_names[i])
        if subset == "training":
            video_path = os.path.join(opt.video_path, video_names[i]) + ".mp4"
            if not os.path.exists(video_path):
                continue

        elif subset == "validation":
            #root_path="/kinetics2/kinetics2/kinetics_val_reencode"
            video_path = os.path.join(opt.video_path, video_names[i]) + ".mp4"

            if not os.path.exists(video_path):

                continue

        n_frames = get_num_frames(video_path)
        if n_frames <= 0:
            continue

        sample = {
            'video': video_path,
            'n_frames': n_frames,
            'video_id': video_names[i]
        }
        if len(annotations) != 0:
            sample['label'] = class_to_idx[annotations[i]['label']]
        else:
            sample['label'] = -1

        if n_samples_for_each_video == 1:
            sample['frame_indices'] = list(range(1, n_frames + 1))
            dataset.append(sample)
        else:
            if n_samples_for_each_video > 1:
                step = max(
                    1,
                    math.ceil((n_frames - 1 - frames_sequence) /
                              (n_samples_for_each_video - 1)))
            else:
                if subset == 'training':
                    step = int(sample_duration / 2)
                else:
                    step = int(sample_duration / 2)
            for j in range(1, n_frames, step):
                sample_j = copy.deepcopy(sample)
                sample_j['frame_indices'] = list(
                    range(j, min(n_frames + 1, j + frames_sequence)))

                total_number_of_clips += 1
                dataset.append(sample_j)
    if n_samples_for_each_video == 0:
        num_of_videos = len(video_names)
        avg_clips_per_video = round(total_number_of_clips / num_of_videos, 2)
        print("Number of videos:", num_of_videos)
        print("Number of clips:", total_number_of_clips)
        print("Avarage amount of clips per video:", avg_clips_per_video)
        print(
            "Number of GFlos for each video in avarage will be,(true to mfnet only):",
            gflop_per_clip * avg_clips_per_video)
    return dataset, idx_to_class
Example #13
0
def show_boxes_in_compressed_video(video_path,
                                   update_ms=10,
                                   min_confidence=0.0,
                                   box_file_path=None,
                                   min_frame_idx=None,
                                   max_frame_idx=None,
                                   frame_interval=1,
                                   frame_type=0,
                                   accumulate=False):
    """
    This function show box

    :param box_file_path: string, the path of the boxes. The format of this file should be the same with MOTChallenge
                det.txt or gt.txt.
    :param video_path: string, the path of frames.
    :param update_ms: scalar, 1000 / update_ms is the fps, default 10
    :param min_confidence: float, the confidence threshold of detection, the boxes with smaller confidence will not be
                displayed. Default 0.0
    :param min_frame_idx: integer, the first frame to display, default the first frame of this sequence
    :param max_frame_idx: integer, the last frame to display, default the last frame of this sequence
    :param frame_interval: the interval to show frames
    :param frame_type: int, can be 0, 1 or 2 (denotes I frame, motion vector, residual, respectively)
    :param accumulate: used for the motion vector and residual. If it is true, the motion vector and residual are
                accumulated.
    :return: None
    """
    def frame_callback(vis, frame_idx):
        #
        print('Processing frame: ', frame_idx)

        # Load image and generate detections.
        # Update visualization.
        GROUP_SIZE = 12  # the number of frames in one group. We set to 12 for the raw mpeg4 video.
        gop_idx = int(
            (frame_idx - 1) / GROUP_SIZE
        )  # GOP starts from 0, while frame_idx here starts from 1.
        in_group_idx = int(
            (frame_idx - 1) % GROUP_SIZE)  # the index in the group
        image = coviar.load(video_path, gop_idx, in_group_idx, frame_type,
                            accumulate)
        image = compressed_frame_to_show(image, frame_type, tool_type='cv2')

        vis.set_image(image.copy(), frame_idx)

        raw_box = seq_info['boxes']
        if raw_boxes is not None:
            index = raw_box[:, 0] == frame_idx
            box = raw_box[index]
            index = box[:, 6] >= min_confidence
            box = box[index]
            box = box[:, 1:7]  # [target_id, x, y, w, h]
            box_list = []
            for idx in range(box.shape[0]):
                box_list.append(box[idx, :])
            vis.draw_box(box_list)

    total_frames = coviar.get_num_frames(video_path) + 1

    # get the first and las frame index
    if min_frame_idx is None:
        min_frame_idx = 1
    if min_frame_idx < 0 or min_frame_idx > total_frames:
        min_frame_idx = 1

    if max_frame_idx is None:
        max_frame_idx = total_frames
    if max_frame_idx < 0 or max_frame_idx > total_frames:
        max_frame_idx = total_frames

    if min_frame_idx > max_frame_idx:
        raise RuntimeError('The first frame index ', min_frame_idx,
                           ' is larger than the last frame index ',
                           max_frame_idx)

    # get the sequence information
    im = coviar.load(video_path, 0, 0, 0, False)
    im_size = im.shape

    raw_boxes = None if box_file_path is None else np.loadtxt(
        box_file_path, dtype=float, delimiter=',')

    seq_info = {
        'image_size': [im_size[0], im_size[1]],
        'min_frame_idx': min_frame_idx,
        'max_frame_idx': max_frame_idx,
        'frame_interval': frame_interval,
        'boxes': raw_boxes,
        'sequence_name': ''
    }

    visualizer = Visualization(seq_info, update_ms)
    visualizer.run(frame_callback)