Esempio n. 1
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, skeleton_path, original_shape, action_label, frame_num, start_frame_idx = data[
            'img_path'], data['skeleton_path'], data['original_shape'], data[
                'action_label'], data['frame_num'], data['start_frame_idx']

        # video load
        video, skeleton_frame_idxs = load_video(img_path, frame_num,
                                                start_frame_idx)
        resized_shape = video.shape[1:3]

        # augmentation
        video, img2aug_trans, aug2img_trans, do_flip = augmentation(
            video, self.data_split)
        video = video.transpose(0, 3, 1, 2).astype(
            np.float32) / 255.  # frame_num, channel_dim, height, width

        # skeleton information load
        pose_coords, pose_scores = self.load_skeleton(skeleton_path,
                                                      skeleton_frame_idxs,
                                                      original_shape,
                                                      resized_shape)

        # process skeleton information
        pose_coords, pose_scores = process_skeleton(pose_coords, pose_scores,
                                                    img2aug_trans, do_flip,
                                                    self.flip_pairs,
                                                    self.joint_num,
                                                    resized_shape)
        """
        # for debug
        # keypoint visualization
        for i in range(cfg.frame_per_seg):
            img = video[i,::-1,:,:].transpose(1,2,0) * 255
            person_num = len(pose_coords[i])
            for p in range(person_num):
                #for j in range(self.joint_num):
                    #coord = (int(pose_coords[i][p][j][0]), int(pose_coords[i][p][j][1]))
                    #cv2.circle(img, coord, radius=3, color=(255,0,0), thickness=-1, lineType=cv2.LINE_AA)
                    #cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '_' + str(j) + '.jpg', img)
                coord = pose_coords[i][p].copy()
                coord[:,0] = coord[:,0] / cfg.input_hm_shape[1] * cfg.input_img_shape[1]
                coord[:,1] = coord[:,1] / cfg.input_hm_shape[0] * cfg.input_img_shape[0]
                img = vis_keypoints(img, pose_coords[i][p] * 4, self.skeleton)
            cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '.jpg', img)
        """

        inputs = {
            'video': video,
            'pose_coords': pose_coords,
            'pose_scores': pose_scores
        }
        targets = {'action_label': action_label}
        meta_info = {'img_id': data['img_id']}
        return inputs, targets, meta_info
Esempio n. 2
0
    if len(losses) > 50:
        done = True
        for i in range(-51, -2):
            if (losses[i] - losses[i + 1]) > loss_tolerance * losses[i]:
                done = False

    losses.append(loss)

    return image, done


if __name__ == "__main__":
    style_path = "../images/style.jpg"
    style_image, _ = load_img(style_path)

    images, yuvs = load_video("../videos/cat2.mp4", frame_interval)

    styled_images = []
    losses = []
    start = time.time()

    content_layers = ["block5_conv2"]

    style_layers = [
        "block1_conv1",
        "block2_conv1",
        "block3_conv1",
        "block4_conv1",
        "block5_conv1",
    ]