Example #1
0
def test_video_classifier_finetune_from_csv(tmpdir):
    with mock_video_csv_file(tmpdir) as (mock_csv, total_duration):

        half_duration = total_duration / 2 - 1e-9

        datamodule = VideoClassificationData.from_csv(
            "file",
            "target",
            train_file=mock_csv,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            batch_size=1,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        model = VideoClassifier(num_classes=datamodule.num_classes,
                                pretrained=False,
                                backbone="slow_r50")
        trainer = flash.Trainer(default_root_dir=tmpdir,
                                fast_dev_run=True,
                                gpus=torch.cuda.device_count())
        trainer.finetune(model, datamodule=datamodule)
Example #2
0
def test_video_classifier_finetune_fiftyone(tmpdir):

    with mock_encoded_video_dataset_folder(tmpdir) as (
            dir_name,
            total_duration,
    ):

        half_duration = total_duration / 2 - 1e-9

        train_dataset = fo.Dataset.from_dir(
            dir_name,
            dataset_type=fo.types.VideoClassificationDirectoryTree,
        )
        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            batch_size=1,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        model = VideoClassifier(num_classes=datamodule.num_classes,
                                pretrained=False,
                                backbone="slow_r50")
        trainer = flash.Trainer(fast_dev_run=True,
                                gpus=torch.cuda.device_count())
        trainer.finetune(model, datamodule=datamodule)
Example #3
0
def from_kinetics(
    clip_sampler: str = "uniform",
    clip_duration: int = 1,
    decode_audio: bool = False,
    batch_size=1,
    **data_module_kwargs,
) -> VideoClassificationData:
    """Downloads and loads the Kinetics data set."""
    download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data")
    return VideoClassificationData.from_folders(
        train_folder=os.path.join(os.getcwd(), "data/kinetics/train"),
        val_folder=os.path.join(os.getcwd(), "data/kinetics/val"),
        clip_sampler=clip_sampler,
        clip_duration=clip_duration,
        decode_audio=decode_audio,
        batch_size=batch_size,
        **data_module_kwargs,
    )
Example #4
0
def test_video_classifier_finetune_fiftyone(tmpdir):

    with mock_encoded_video_dataset_folder(tmpdir) as (
        dir_name,
        total_duration,
    ):

        half_duration = total_duration / 2 - 1e-9

        train_dataset = fo.Dataset.from_dir(
            dir_name,
            dataset_type=fo.types.VideoClassificationDirectoryTree,
        )
        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        assert len(VideoClassifier.available_backbones()) > 5

        train_transform = {
            "post_tensor_transform": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=Compose([
                        UniformTemporalSubsample(8),
                        RandomShortSideScale(min_size=256, max_size=320),
                        RandomCrop(244),
                        RandomHorizontalFlip(p=0.5),
                    ]),
                ),
            ]),
            "per_batch_transform_on_device": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=K.VideoSequential(
                        K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])),
                        K.augmentation.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                        data_format="BCTHW",
                        same_on_frame=False
                    )
                ),
            ]),
        }

        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            train_transform=train_transform
        )

        model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False)

        trainer = flash.Trainer(fast_dev_run=True)

        trainer.finetune(model, datamodule=datamodule)
                ApplyTransformToKey(key="video",
                                    transform=K.VideoSequential(
                                        *per_batch_transform_on_device,
                                        data_format="BCTHW",
                                        same_on_frame=False)),
            ]),
        }

    # 3. Load the data from directories.
    datamodule = VideoClassificationData.from_folders(
        train_folder=os.path.join(flash.PROJECT_ROOT, "data/kinetics/train"),
        val_folder=os.path.join(flash.PROJECT_ROOT, "data/kinetics/val"),
        predict_folder=os.path.join(flash.PROJECT_ROOT,
                                    "data/kinetics/predict"),
        train_transform=make_transform(train_post_tensor_transform),
        val_transform=make_transform(val_post_tensor_transform),
        predict_transform=make_transform(val_post_tensor_transform),
        batch_size=8,
        clip_sampler="uniform",
        clip_duration=1,
        video_sampler=RandomSampler,
        decode_audio=False,
        num_workers=8)

    # 4. List the available models
    print(VideoClassifier.available_backbones())
    # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs']
    print(VideoClassifier.get_backbone_details("x3d_xs"))

    # 5. Build the VideoClassifier with a PyTorchVideo backbone.
    model = VideoClassifier(backbone="x3d_xs",
                            num_classes=datamodule.num_classes,
import os

import torch

import flash
from flash.core.data.utils import download_data
from flash.video import VideoClassificationData, VideoClassifier

# 1. Create the DataModule
# Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html
download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data")

datamodule = VideoClassificationData.from_folders(
    train_folder=os.path.join(os.getcwd(), "data/kinetics/train"),
    val_folder=os.path.join(os.getcwd(), "data/kinetics/val"),
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
)

# 2. Build the task
model = VideoClassifier(backbone="x3d_xs",
                        num_classes=datamodule.num_classes,
                        pretrained=False)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 4. Make a prediction
predictions = model.predict(os.path.join(os.getcwd(), "data/kinetics/predict"))
import flash
from flash.core.data.utils import download_data
from flash.core.integrations.labelstudio.visualizer import launch_app
from flash.video import VideoClassificationData, VideoClassifier

# 1 Download data
download_data(
    "https://label-studio-testdata.s3.us-east-2.amazonaws.com/lightning-flash/video_data.zip"
)

# 2. Load export data
datamodule = VideoClassificationData.from_labelstudio(
    export_json="data/project.json",
    data_folder="data/upload/",
    val_split=0.2,
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
)

# 3. Build the task
model = VideoClassifier(
    backbone="slow_r50",
    num_classes=datamodule.num_classes,
)

# 4. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 5. Make a prediction
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

import flash
from flash.core.data.utils import download_data
from flash.video import VideoClassificationData, VideoClassifier

# 1. Create the DataModule
# Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html
download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data")

datamodule = VideoClassificationData.from_folders(
    train_folder="data/kinetics/train",
    val_folder="data/kinetics/val",
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
    batch_size=1,
)

# 2. Build the task
model = VideoClassifier(backbone="x3d_xs", labels=datamodule.labels, pretrained=False)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(
    max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None
)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 4. Make a prediction
datamodule = VideoClassificationData.from_folders(predict_folder="data/kinetics/predict", batch_size=1)
Example #9
0
                ApplyTransformToKey(key="video",
                                    transform=K.VideoSequential(
                                        *per_batch_transform_on_device,
                                        data_format="BCTHW",
                                        same_on_frame=False)),
            ]),
        }

    # 3. Load the data from directories.
    datamodule = VideoClassificationData.from_paths(
        train_data_path=os.path.join(_PATH_ROOT, "data/kinetics/train"),
        val_data_path=os.path.join(_PATH_ROOT, "data/kinetics/val"),
        predict_data_path=os.path.join(_PATH_ROOT, "data/kinetics/predict"),
        clip_sampler="uniform",
        clip_duration=2,
        video_sampler=RandomSampler,
        decode_audio=False,
        train_transform=make_transform(train_post_tensor_transform),
        val_transform=make_transform(val_post_tensor_transform),
        predict_transform=make_transform(val_post_tensor_transform),
        num_workers=8,
        batch_size=8,
    )

    # 4. List the available models
    print(VideoClassifier.available_models())
    # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs']
    print(VideoClassifier.get_model_details("x3d_xs"))

    # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`.
    model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes)
    model.serializer = Labels()