Example #1
0
def from_urban8k(
    batch_size: int = 4,
    **data_module_kwargs,
) -> AudioClassificationData:
    """Downloads and loads the Urban 8k sounds images data set."""
    download_data("https://pl-flash-data.s3.amazonaws.com/urban8k_images.zip",
                  "./data")
    return AudioClassificationData.from_folders(
        train_folder="data/urban8k_images/train",
        val_folder="data/urban8k_images/val",
        batch_size=batch_size,
        **data_module_kwargs,
    )
Example #2
0
def test_from_folders_only_train(tmpdir):

    seed_everything(42)

    train_dir = Path(tmpdir / "train")
    train_dir.mkdir()

    (train_dir / "a").mkdir()
    _rand_image().save(train_dir / "a" / "1.png")
    _rand_image().save(train_dir / "a" / "2.png")

    (train_dir / "b").mkdir()
    _rand_image().save(train_dir / "b" / "1.png")
    _rand_image().save(train_dir / "b" / "2.png")

    spectrograms_data = AudioClassificationData.from_folders(train_dir,
                                                             batch_size=1)

    data = next(iter(spectrograms_data.train_dataloader()))
    imgs, labels = data["input"], data["target"]
    assert imgs.shape == (1, 3, 128, 128)
    assert labels.shape == (1, )
Example #3
0
def test_from_folders_train_val(tmpdir):
    seed_everything(42)

    train_dir = Path(tmpdir / "train")
    train_dir.mkdir()

    (train_dir / "a").mkdir()
    _rand_image().save(train_dir / "a" / "1.png")
    _rand_image().save(train_dir / "a" / "2.png")

    (train_dir / "b").mkdir()
    _rand_image().save(train_dir / "b" / "1.png")
    _rand_image().save(train_dir / "b" / "2.png")
    spectrograms_data = AudioClassificationData.from_folders(
        train_dir,
        val_folder=train_dir,
        test_folder=train_dir,
        batch_size=2,
        num_workers=0,
    )

    data = next(iter(spectrograms_data.train_dataloader()))
    imgs, labels = data["input"], data["target"]
    assert imgs.shape == (2, 3, 128, 128)
    assert labels.shape == (2, )
    assert list(labels.numpy()) == [0, 1]

    data = next(iter(spectrograms_data.val_dataloader()))
    imgs, labels = data["input"], data["target"]
    assert imgs.shape == (2, 3, 128, 128)
    assert labels.shape == (2, )
    assert list(labels.numpy()) == [0, 0]

    data = next(iter(spectrograms_data.test_dataloader()))
    imgs, labels = data["input"], data["target"]
    assert imgs.shape == (2, 3, 128, 128)
    assert labels.shape == (2, )
    assert list(labels.numpy()) == [0, 0]
Example #4
0
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

import flash
from flash.audio import AudioClassificationData
from flash.core.data.utils import download_data
from flash.core.finetuning import FreezeUnfreeze
from flash.image import ImageClassifier

# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/urban8k_images.zip", "./data")

datamodule = AudioClassificationData.from_folders(
    train_folder="data/urban8k_images/train",
    val_folder="data/urban8k_images/val",
    spectrogram_size=(64, 64),
)

# 2. Build the model.
model = ImageClassifier(backbone="resnet18", num_classes=datamodule.num_classes)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy=FreezeUnfreeze(unfreeze_epoch=1))

# 4. Predict what's on few images! air_conditioner, children_playing, siren e.t.c
predictions = model.predict(
    [
        "data/urban8k_images/test/air_conditioner/13230-0-0-5.wav.jpg",
        "data/urban8k_images/test/children_playing/9223-2-0-15.wav.jpg",
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

import flash
from flash.audio import AudioClassificationData
from flash.core.data.utils import download_data
from flash.image import ImageClassifier

# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/urban8k_images.zip",
              "./data")

datamodule = AudioClassificationData.from_folders(
    train_folder="data/urban8k_images/train",
    val_folder="data/urban8k_images/val",
    transform_kwargs=dict(spectrogram_size=(64, 64)),
    batch_size=4,
)

# 2. Build the model.
model = ImageClassifier(backbone="resnet18", labels=datamodule.labels)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy=("freeze_unfreeze", 1))

# 4. Predict what's on few images! air_conditioner, children_playing, siren etc.
datamodule = AudioClassificationData.from_files(
    predict_files=[
        "data/urban8k_images/test/air_conditioner/13230-0-0-5.wav.jpg",
        "data/urban8k_images/test/children_playing/9223-2-0-15.wav.jpg",