예제 #1
0
def get_train_transforms(
        config: object,
        transforms_set: TformsSet = TformsSet.Audtorch) -> object:
    if config.use_mels:
        if transforms_set == TformsSet.TorchAudio:
            trans = tforms_vision.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_torch.MelSpectrogram(sample_rate=config.resampling_rate,
                                            n_fft=config.n_fft,
                                            win_length=config.hop_length,
                                            hop_length=config.hop_length,
                                            f_min=float(config.fmin),
                                            f_max=float(config.fmax),
                                            pad=0,
                                            n_mels=config.n_mels),
                tforms_torch.AmplitudeToDB(stype='power', top_db=80),
                #tforms_aud.RandomCrop(config.max_length_frames),  # Raises "Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead."
            ])
        elif transforms_set == TformsSet.MySet:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_mine.Spectrogram(config),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
    else:
        if transforms_set == TformsSet.TorchAudio:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_torch.Spectrogram(n_fft=config.n_fft,
                                         win_length=config.hop_length,
                                         hop_length=config.hop_length,
                                         pad=0,
                                         power=2,
                                         normalized=True),
                tforms_torch.AmplitudeToDB(stype='power', top_db=80),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
        elif transforms_set == TformsSet.MySet:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_mine.Spectrogram(config),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
    return trans
예제 #2
0
elif config.platform == 3:
    root = '/m/cs/work/falconr1/datasets/mtg-jamendo-dataset-master'

split = 0
start_id = config.start_id * config.chunk_size
stop_id = start_id + config.chunk_size
full_output_path = os.path.join(root, config.output_path)

dataset = JamendoAudioFolder_audtorch(
    root,
    config.subset,
    split,
    config.mode,
    return_fname=True,
    transform=tforms2.Compose([
        tforms2.RandomCrop(config.max_length),
        tforms2.Downmix(1),
        tforms2.Normalize()
    ]),
)

## TODO use the audio dataset, not the npy

# dataset = JamendoAudioFolder_npy(root,
#                                  config.subset,
#                                  split,
#                                  config.mode,
#                                  trim_to_size=config.max_length,
#                                  return_fname=True,
#                                  transform=tforms2.Compose([
#                                            tforms2.Downmix(1),
예제 #3
0
@pytest.mark.parametrize('stdev', [1, 0.5])
def test_whitenoise(duration, sampling_rate, mean, stdev):
    dataset = datasets.WhiteNoise(duration=duration,
                                  sampling_rate=sampling_rate,
                                  mean=mean,
                                  stdev=stdev)
    noise, label = next(iter(dataset))
    samples = int(np.ceil(duration * sampling_rate))
    assert noise.shape == (1, samples)
    assert label == 'white noise'
    assert -1 <= np.max(np.abs(noise)) <= 1
    assert len(dataset) == 1


# --- datasets/utils.py ---
crop = transforms.RandomCrop(8192)
resamp1 = transforms.Resample(48000, 44100)
resamp2 = transforms.Resample(44100, 16000)
t1 = transforms.Compose([crop, resamp1])
t2 = transforms.Compose([crop, resamp1, resamp2])
t3 = transforms.Compose([resamp1, crop, resamp2])
d0 = datasets.WhiteNoise(duration=0.5, sampling_rate=48000, transform=crop)
d1 = datasets.WhiteNoise(duration=0.5, sampling_rate=48000, transform=t1)
d2 = datasets.WhiteNoise(duration=0.5, sampling_rate=48000, transform=t2)
d3 = datasets.WhiteNoise(duration=0.5, sampling_rate=48000, transform=t3)
df_empty = pd.DataFrame()
df_a = pd.DataFrame(data=[0], columns=['a'])
df_ab = pd.DataFrame(data=[('0', 1)], columns=['a', 'b'])


@pytest.mark.parametrize('list_of_datasets', [
예제 #4
0
def test_randomcrop(input, size, axis):
    t = transforms.RandomCrop(size, axis=axis)
    t.fix_randomization = True
    assert np.array_equal(t(input), t(input))
    assert np.array_equal(t(input), F.crop(input, t.idx, axis=t.axis))
예제 #5
0
def get_DataLoader(config):
    train_transforms = get_Transforms()

    if config.platform == 0:
        root = '/Volumes/scratch/work/falconr1/datasets/mtg-jamendo-dataset-master'
    elif config.platform == 2:
        root = '/scratch/work/falconr1/datasets/mtg-jamendo-dataset-master'
    elif config.platform == 3:
        root = '/m/cs/work/falconr1/datasets/mtg-jamendo-dataset-master'

    subset = config.subset
    split = 0
    mode = 'train'


    if config.dataset == 'JamendoSpecFolder':
        dataset = JamendoSpecFolder(root,
                                    subset,
                                    split,
                                    mode,
                                    spec_folder='data/processed/spec_npy',
                                    transform=train_transforms)

    elif config.dataset == 'JamendoSpecHDF5':
        dataset = JamendoSpecHDF5(root,
                                  subset,
                                  split,
                                  mode,
                                  train_transforms,
                                  hdf5_filename='data/processed/jamendo.hdf5')
    elif config.dataset == 'JamendoSpecLMDB':
        dataset = JamendoSpecLMDB(root,
                                  subset,
                                  split,
                                  mode,
                                  train_transforms,
                                  lmdb_path='data/processed/triton')
    elif config.dataset == 'JamendoSpecLMDBsubdir':
        dataset = JamendoSpecLMDBsubdir(root,
                                  subset,
                                  split,
                                  mode,
                                  train_transforms,
                                  lmdb_path='data/processed/chunks')
    elif config.dataset == 'fake':
        dataset = dset.FakeData(image_size=(1, 96, 1366),
                                transform=transforms.Compose([
                                    transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                    transforms.ToTensor()
                                ]))
    elif config.dataset == 'SVHN':
        dataset = dset.SVHN(root='/m/cs/work/falconr1/datasets/SVHN',
                            transform=transforms.Compose([
                                    transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                    transforms.ToTensor()
                                ]),
                            download=True)
    elif config.dataset == 'JamendoAudioFolder_torchaudio':
        dataset = JamendoAudioFolder_torchaudio(root,
                                              subset,
                                              split,
                                              mode,
                                              transform=transforms.Compose([
                                                  tforms.MelSpectrogram(sr=44100,
                                                                        n_fft=512,
                                                                        ws=256,
                                                                        hop=256,
                                                                        f_min=20.0,
                                                                        f_max=8000,
                                                                        pad=0,
                                                                        n_mels=96),
                                                  transforms.ToPILImage(),
                                                  transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                                  transforms.ToTensor(),
                                                ])
                                              )
    elif config.dataset == 'JamendoAudioFolder_audtorch':
        dataset = JamendoAudioFolder_audtorch(root,
                                              subset,
                                              split,
                                              mode,
                                              ## transform=tforms2.RandomCrop(size=256*44100),
                                              # transform=tforms2.Compose([
                                              #     tforms2.Downmix(1),
                                              #     tforms2.Normalize(),
                                              #     tforms2.Spectrogram(window_size=256,
                                              #                         hop_size=256,
                                              #                         fft_size=512),
                                              #     tforms2.Log(),
                                              #     # tforms2.LogSpectrogram(window_size=256,
                                              #     #                        hop_size=256,
                                              #     #                        normalize=True),
                                              #     myTforms.Debugger(),
                                              #     myTforms.CFL2FLC(),
                                              #     transforms.ToPILImage(),
                                              #     transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                              #     transforms.ToTensor(),
                                              #   ])
                                              )
    elif config.dataset == 'JamendoAudioFolder_npy':
        dataset = JamendoAudioFolder_npy(root,
                                         subset,
                                         split,
                                         mode,
                                         trim_to_size=config.trim_size,
                                         ###transform=tforms2.Downmix(1),
                                         #transform=tforms2.RandomCrop(size=30*44100),
                                         # transform=tforms2.Compose([
                                         #     tforms2.Downmix(1),
                                         #     tforms2.Normalize(),
                                         #     tforms2.Spectrogram(window_size=256,
                                         #                         hop_size=256,
                                         #                         fft_size=512),
                                         #     tforms2.Log(),
                                         #     # tforms2.LogSpectrogram(window_size=256,
                                         #     #                        hop_size=256,
                                         #     #                        normalize=True),
                                         #     myTforms.Debugger(),
                                         #     myTforms.CFL2FLC(),
                                         #     transforms.ToPILImage(),
                                         #     transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                         #     transforms.ToTensor(),
                                         #   ])
                                         )
    elif config.dataset == 'JamendoAudioFolder_torch':
        dataset = JamendoAudioFolder_torch(root,
                                         subset,
                                         split,
                                         mode,
                                         ###transform=tforms2.Downmix(1),
                                         transform=tforms2.RandomCrop(size=30*44100),
                                         # transform=tforms2.Compose([
                                         #     tforms2.Downmix(1),
                                         #     tforms2.Normalize(),
                                         #     tforms2.Spectrogram(window_size=256,
                                         #                         hop_size=256,
                                         #                         fft_size=512),
                                         #     tforms2.Log(),
                                         #     # tforms2.LogSpectrogram(window_size=256,
                                         #     #                        hop_size=256,
                                         #     #                        normalize=True),
                                         #     myTforms.Debugger(),
                                         #     myTforms.CFL2FLC(),
                                         #     transforms.ToPILImage(),
                                         #     transforms.RandomCrop((96, 256), pad_if_needed=True, padding_mode='reflect'),
                                         #     transforms.ToTensor(),
                                         #   ])
                                         )

    subset_indices = np.random.choice(range(len(dataset)), config.data_limit, replace=False)

    print('------ Dataset length = {}, using {} samples.'.format(len(dataset), len(subset_indices)))

    if config.collate_fn == 'seq2seq':
        collate = Seq2Seq([-1,-1], batch_first=None, sort_sequences=False)
        #collate = Seq2Seq_short([-1, -1], batch_first=None, sort_sequences=False)
    else:
        collate = torch.utils.data.dataloader.default_collate

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=config.batch_size,
                                             # shuffle=True,
                                             num_workers=config.num_workers,
                                             pin_memory=True,
                                             sampler=torch.utils.data.sampler.SubsetRandomSampler(subset_indices),
                                             collate_fn=collate,
                                             drop_last=True,
                                             )

    return dataloader