def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])
  
  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)
  
  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  sr, audio = wav_read(io.BytesIO(riff))

  return audio, sr
Example #2
0
    def load(path=None, subsample=1):

        if path is None:
            path = os.environ["DATASET_PATH"]
        audiomnist.download(path)
        t0 = time.time()

        # load wavs
        f = zipfile.ZipFile(path + "audiomnist/data.zip")
        wavs = list()
        digits = list()
        speakers = list()
        N = 0
        for filename in tqdm(f.namelist(), ascii=True):
            if ".wav" not in filename:
                continue
            filename_end = filename.split("/")[-1]
            digits.append(int(filename_end.split("_")[0]))
            speakers.append(int(filename_end.split("_")[1]) - 1)
            wavfile = f.read(filename)
            byt = io.BytesIO(wavfile)
            wavs.append(wav_read(byt)[1].astype("float32")[::subsample])
            N = max(N, len(wavs[-1]))

        digits = np.array(digits)
        speakers = np.array(speakers)
        all_wavs = np.zeros((len(wavs), N))
        for i in range(len(wavs)):
            left = (N - len(wavs[i])) // 2
            all_wavs[i, left:left + len(wavs[i])] = wavs[i]
        print("Audio-MNIST loaded in {} s.".format(time.time() - t0))
        return all_wavs, digits, speakers
Example #3
0
    def load(path=None):

        if path is None:
            path = os.environ['DATASET_PATH']
        irmas.download(path)

        t0 = time.time()

        train_wavs = list()
        train_labels = list()

        test_wavs = list()
        test_labels = list()

        # loading the training set
        f = zipfile.ZipFile(path + 'irmas/IRMAS-TrainingData.zip')
        namelist = f.namelist()
        for filename in tqdm(namelist, ascii=True):
            if '.wav' not in filename:
                continue
            wavfile = f.read(filename)
            byt = io.BytesIO(wavfile)
            train_wavs.append(wav_read(byt)[1].astype('float32'))
            train_labels.append(filename.split('/')[-2])

        base = 'irmas/IRMAS-TestingData-Part{}.zip'
        for part in ['1', '2', '3']:
            f = zipfile.ZipFile(path + base.format(part))
            namelist = f.namelist()
            for filename in tqdm(namelist, ascii=True):
                if '.wav' not in filename:
                    continue

                byt = io.BytesIO(f.read(filename))
                test_wavs.append(wav_read(byt)[1].astype('float32'))

                byt = io.BytesIO(f.read(filename.replace('.wav', '.txt')))
                test_labels.append(np.loadtxt(byt, dtype='str')[0])

        categories = np.array(labels)
        labels = np.zeros(len(categories))
        wavs = np.array(wavs)
        for i, c in enumerate(np.unique(categories)):
            labels[categories == c] = i

        print('Dataset IRMAS loaded in {0:.2f}s.'.format(time.time() - t0))
        return wavs, labels, categories
Example #4
0
def load_audio(
    path: Path,
    channel: Optional[int] = None,
    mmap: bool = False,
    channel_names: List[str] = ["left", "right"],
) -> MultiTrack:
    """load waveform from file"""
    multiTrack = MultiTrack()
    assert 0 < len(channel_names) <= 2
    try:
        fs, value = wav_read(path, mmap=mmap)
    except ValueError:
        try:
            import soundfile as sf

            value, fs = sf.read(path, dtype="int16")
        except ImportError:
            logging.error(
                f"Scipy was unable to import {path}, "
                f"try installing soundfile python package for more compatability"
            )
            raise ImportError
        except RuntimeError:
            raise RuntimeError(f"Unable to import audio file {path}")
    if value.ndim == 1:
        if channel is not None and channel != 0:
            raise MultiChannelError(
                f"cannot select channel {channel} from monaural file {path}")
        multiTrack[channel_names[0]] = Wave(value[:, np.newaxis],
                                            fs,
                                            path=path)
    if value.ndim == 2:

        if channel is None:
            multiTrack[channel_names[0]] = Wave(value[:, 0], fs, path=path)
            multiTrack[channel_names[1]] = Wave(value[:, 1], fs, path=path)
        else:
            try:
                multiTrack[channel_names[channel]] = Wave(value[:, channel],
                                                          fs,
                                                          path=path)
            except IndexError:
                raise MultiChannelError(
                    f"cannot select channel {channel} from file "
                    f"{path} with {value.shape[1]} channels")

    for k in multiTrack.keys():
        value = multiTrack[k].value

        if np.issubdtype(value.dtype, np.integer):
            multiTrack[k].min = np.iinfo(value.dtype).min
            multiTrack[k].max = np.iinfo(value.dtype).max
        elif np.issubdtype(value.dtype, np.floating):
            multiTrack[k].min = -1.0
            multiTrack[k].max = 1.0
        else:
            logging.error(f"Wave dtype {value.dtype} not supported")
            raise NotImplementedError
    return multiTrack
Example #5
0
def load(path=None):
    """
    Parameters
    ----------
        path: str (optional)
            default ($DATASET_PATH), the path to look for the data and
            where the data will be downloaded if not present

    Returns
    -------

        wavs: array
            the waveforms in the time amplitude domain

        labels: array
            binary values representing the presence or not of an avian

        flag: array
            the Xeno-Canto ID

    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, _dataset, _urls, extract=True)

    t0 = time.time()

    archive = zipfile.ZipFile(path + "picidae/PicidaeDataset.zip")
    wavs = list()
    labels = list()
    XC = list()
    for item in tqdm(archive.namelist(), ascii=True):
        if item[-4:] == ".wav" and "._" not in item:
            wavfile = archive.read(item)
            byt = io.BytesIO(wavfile)
            wavs.append(wav_read(byt)[1].astype("float32"))
            labels.append(item.split("/")[1])
            XC.append(item.split("/")[2].split("-")[0])

    labels = np.array(labels)
    unique = np.unique(labels)
    y = np.zeros(len(labels), dtype="int32")
    for k, name in enumerate(np.sort(unique)):
        y[labels == name] = k

    data = {
        "wavs": wavs,
        "labels": y,
        "names": labels,
        "XC_identifiers": XC,
        "DOC": DOC,
    }

    print("Dataset picidae loaded in {0:.2f}s.".format(time.time() - t0))

    return data
Example #6
0
    def load(path=None):
        """
        Parameters
        ----------
            path: str (optional)
                default ($DATASET_PATH), the path to look for the data and
                where the data will be downloaded if not present

        Returns
        -------

            wavs: array
                the waveforms in the time amplitude domain

            labels: array
                binary values representing the presence or not of an avian

            recording: array
                the file number from which the sample has been extracted

        """

        if path is None:
            path = os.environ["DATASET_PATH"]

        birdvox_dcase_20k.download(path)

        t0 = time.time()

        # Loading the file
        basefile = path + "birdvox_dcase_20k/BirdVox-DCASE-20k.zip"
        wavs = list()
        labels = np.loadtxt(
            path + "birdvox_dcase_20k/data_labels.csv",
            skiprows=1,
            delimiter=",",
            dtype="str",
        )
        wav_names = list(labels[:, 0])
        wav_labels = labels[:, 2].astype("int")
        labels = list()
        f = zipfile.ZipFile(basefile)
        for name in tqdm(f.namelist(), ascii=True):
            filename = name.split("/")[-1][:-4]
            if ".wav" not in name or filename not in wav_names:
                continue
            byt = io.BytesIO(f.read(name))
            wavs.append(wav_read(byt)[1].astype("float32"))
            labels.append(wav_labels[wav_names.index(filename)])

        wavs = np.array(wavs).astype("float32")
        labels = np.array(labels).astype("int32")

        print("Dataset birdvox_dcase_20k loaded in {0:.2f}s.".format(
            time.time() - t0))

        return wavs, labels
Example #7
0
    def load(path=None, classes=range(10)):

        if path is None:
            path = os.environ["DATASET_PATH"]
        sonycust.download(path)

        t0 = time.time()

        # Loading the file
        files = tarfile.open(path + "ust/audio-dev.tar.gz", "r:gz")
        annotations = np.loadtxt(path + "ust/annotations-dev.csv",
                                 delimiter=",",
                                 skiprows=1,
                                 dtype="str")

        # get name
        filenames = list(annotations[:, 2])
        for i in range(len(filenames)):
            filenames[i] = annotations[i, 0] + "/" + str(filenames[i])

        # get fine labels and limts for coarse classes
        fine_labels = annotations[:, 4:33].astype("float32").astype("int32")
        class_limits = [0, 4, 9, 10, 14, 19, 23, 28, 29]
        n_classes = len(class_limits) - 1
        n_samples = len(annotations)
        llabels = np.zeros((n_samples, n_classes), dtype="int")
        for k in range(n_classes):
            block = fine_labels[:, class_limits[k]:class_limits[k + 1]]
            llabels[:, k] = block.max(1)

        wavs = np.zeros((2794, 441000), dtype="float32")
        coarse = np.zeros((2794, 8), dtype="int32")
        fine = np.zeros((2794, 29), dtype="int32")
        filenames = files.getnames()
        cpt = 0
        for name in tqdm(filenames, ascii=True):
            if ".wav" not in name:
                continue
            wav = wav_read(files.extractfile(name))[1].astype("float32")
            wavs[cpt] = wav_read(files.extractfile(name))[1].astype("float32")
            coarse[cpt] = llabels[filenames.index(name)]
            fine[cpt] = fine_labels[filenames.index(name)]
            cpt += 1
        return wavs, fine, coarse
Example #8
0
    def load(path=None):
        """
        Parameters
        ----------

        path: str (optional)
            a string where to load the data and download if not present

        Returns
        -------

        singers: list
            the list of singers as strings, 11 males and 9 females as in male1,
            male2, ...

        genders: list
            the list of genders of the singers as in male, male, female, ...

        vowels: list
            the vowels being pronunced

        data: list
            the list of waveforms, not all equal length

        """
        if path is None:
            path = os.environ["DATASET_PATH"]

        vocalset.download(path)
        t = time.time()

        # load wavs
        f = zipfile.ZipFile(path + "vocalset/VocalSet11.zip")

        # init. the data array
        singers = []
        genders = []
        vowels = []
        #        techniques = []
        data = []
        for filename in tqdm(f.namelist(), ascii=True):
            if ".wav" not in filename or "excerpts" in filename or "_" == filename[
                    0]:
                continue
            vowel = filename[-5]
            if vowel not in ["a", "e", "i", "o", "u"]:
                continue
            vowels.append(vowel)
            bytes_ = io.BytesIO(f.read(filename))
            data.append(wav_read(bytes_)[1].astype("float32"))
            split = filename.split("/")
            genders.append("".join(x for x in split[1] if x.isalpha()))
            singers.append(split[1])
        #            techniques.append(split[-1][3:-6])

        return singers, genders, vowels, data
Example #9
0
def record_audio():
    wav_file = 'chunk.wav'
    run([
        'arecord', '-D', f'hw:{config.audio.device}', '-f',
        f'{config.audio.format}', '-r', f'{config.audio.sample_rate}', '-c',
        f'{config.audio.channels}', '-d', f'{config.audio.chunk_duration}',
        wav_file
    ],
        check=True)
    sample_rate, data = wav_read(wav_file)
    return data
Example #10
0
    def load(path=None):

        if path is None:
            path = os.environ["DATASET_PATH"]
        speech_commands.download(path)

        t0 = time.time()

        print("Loading speech command")

        tar = tarfile.open(
            path + "speech_commands/speech_commands_v0.01.tar.gz", "r:gz"
        )

        # Load train set
        wavs = list()
        labels = list()
        noises = list()
        noise_labels = list()
        names = tar.getmembers()
        for name in tqdm(names, ascii=True):
            if "wav" not in name.name:
                continue
            f = tar.extractfile(name.name)  # .read()
            wav = wav_read(f)[1]
            if "noise" in name.name:
                noises.append(wav)
                noise_labels.append(name.name.split("/")[-1])
            else:
                left = 16000 - len(wav)
                to_pad = left // 2
                wavs.append(np.pad(wav, [[to_pad, left - to_pad]]))
                labels.append(name.name.split("/")[-2])
        labels = np.array(labels)
        unique_labels = np.unique(labels)
        y = np.squeeze(
            np.array(
                [np.nonzero(label == unique_labels)[0] for label in labels]
            ).astype("int32")
        )

        data = {
            "wavs": np.array(wavs).astype("float32"),
            "labels": y,
            "names": labels,
            "noises": noises,
            "noises_labels": noises_labels,
            "INFOS": speech_commands.__doc__,
        }

        print("Dataset speech commands loaded in{0:.2f}s.".format(time.time() - t0))

        return data
Example #11
0
def get_wav_duration(file: str) -> float:
    """
    Calc duration of wave file
    :param file: file path
    :return: wave duration in seconds
    """
    try:
        sr, wav = wav_read(file)
        dur = len(wav) / sr
    except:
        dur = -1
    return dur
Example #12
0
    def read_wav(cls, path, channel=None, mmap=False):
        """load waveform from file"""
        try:
            fs, value = wav_read(path, mmap=mmap)
        except ValueError:
            try:
                import soundfile as sf

                value, fs = sf.read(path, dtype="int16")
            except ImportError:
                logging.error(
                    f"Scipy was unable to import {path}, "
                    f"try installing soundfile python package for more compatability"
                )
                raise ImportError
            except RuntimeError:
                raise RuntimeError(f"Unable to import audio file {path}")
        if value.ndim == 1:
            if channel is not None and channel != 0:
                raise MultiChannelError(
                    f"cannot select channel {channel} from monaural file {path}"
                )
        if value.ndim == 2:
            if channel is None:
                raise MultiChannelError(
                    f"must select channel when loading file {path} with {value.shape[1]} channels"
                )
            try:
                value = value[:, channel]
            except IndexError:
                raise MultiChannelError(
                    f"cannot select channel {channel} from file "
                    f"{path} with {value.shape[1]} channels")
        wav = Wave(value, fs, path=path)
        if value.dtype == numpy.dtype(numpy.int16):
            wav.min = -32767
            wav.max = 32768
        elif value.dtype == numpy.dtype(numpy.int32):
            wav.min = -2147483648
            wav.max = 2147483647
        elif value.dtype == numpy.dtype(numpy.uint8):
            wav.min = 0
            wav.max = 255
        elif value.dtype in set(
            [numpy.dtype(numpy.float64),
             numpy.dtype(numpy.float32)]):
            wav.max = 1.0
            wav.min = -1.0
        else:
            logging.error(f"Wave dtype {value.dtype} not supported")
            raise NotImplementedError
        return wav
Example #13
0
def load(path=None):
    """music genre classification

    This dataset was used for the well known paper in genre classification
    "Musical genre classification of audio signals" by G. Tzanetakis
    and P. Cook in IEEE Transactions on Audio and Speech Processing 2002.

    Unfortunately the database was collected gradually and very early on in my
    research so I have no titles (and obviously no copyright permission etc).
    The files were collected in 2000-2001 from a variety of sources including
    personal CDs, radio, microphone recordings, in order to represent a variety
    of recording conditions. Nevetheless I have been providing it to researchers
    upon request mainly for comparison purposes etc. Please contact George
    Tzanetakis ([email protected]) if you intend to publish experimental results
    using this dataset.

    There are some practical and conceptual issues with this dataset, described
    in "The GTZAN dataset: Its contents, its faults, their effects on
    evaluation, and its future use" by B. Sturm on arXiv 2013.
    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, "gtzan", _urls)
    print("Loading gtzan")
    t0 = time.time()
    tar = tarfile.open(path + "gtzan/genres.tar.gz", "r:gz")

    # Load train set
    train_songs = list()
    train_labels = list()
    names = tar.getnames()
    names = tar.getmembers()
    for name in tqdm(names, ascii=True, total=1000):
        if "wav" not in name.name:
            continue
        f = tar.extractfile(name.name)  # .read()
        train_songs.append(wav_read(f)[1])
        t = name.name.split("/")[1]
        train_labels.append(gtzan.name2class[t])

    N = np.min([len(w) for w in train_songs])
    train_songs = [w[:N] for w in train_songs]

    train_songs = np.stack(train_songs).astype("float32")
    train_labels = np.array(train_labels).astype("int32")

    print("Dataset gtzan loaded in{0:.2f}s.".format(time.time() - t0))
    data = {"wavs": train_songs, "labels": train_labels}
    return data
Example #14
0
    def load(path=None):
        """
        Parameters
        ----------
            path: str (optional)
                default ($DATASET_PATH), the path to look for the data and
                where the data will be downloaded if not present

        Returns
        -------

            wavs: array
                the waveforms in the time amplitude domain

            labels: array
                binary values representing the presence or not of an avian

            flag: array
                the Xeno-Canto ID

        """

        if path is None:
            path = os.environ['DATASET_PATH']

        picidae.download(path)

        t0 = time.time()

        archive = zipfile.ZipFile(path + 'picidae/PicidaeDataset.zip')
        wavs = list()
        labels = list()
        XC = list()
        for item in tqdm(archive.namelist(), ascii=True):
            if item[-4:] == '.wav' and '._' not in item:
                wavfile = archive.read(item)
                byt = io.BytesIO(wavfile)
                wavs.append(wav_read(byt)[1].astype('float32'))
                labels.append(item.split('/')[1])
                XC.append(item.split('/')[2].split('-')[0])

        labels = np.array(labels)
        unique = np.unique(labels)
        y = np.zeros(len(labels), dtype='int32')
        for k, name in enumerate(np.sort(unique)):
            y[labels == name] = k

        print('Dataset picidae loaded in {0:.2f}s.'.format(time.time() - t0))

        return wavs, y, labels, XC
Example #15
0
def load(path=None):
    """
    digit recognition
        https://github.com/soerenab/AudioMNIST

    A simple audio/speech dataset consisting of recordings of spoken digits in
    wav files at 8kHz. The recordings are trimmed so that they have near
    minimal silence at the beginnings and ends.

    FSDD is an open dataset, which means it will grow over time as data is
    contributed. In order to enable reproducibility and accurate citation the
    dataset is versioned using Zenodo DOI as well as git tags.

    Current status

        4 speakers
        2,000 recordings (50 of each digit per speaker)
        English pronunciations
    """
    if path is None:
        path = os.environ["DATASET_PATH"]
    download_dataset(path, _dataset, _urls)

    t0 = time.time()

    # load wavs
    f = zipfile.ZipFile(os.path.join(path, _dataset, "data.zip"))
    wavs = list()
    digits = list()
    speakers = list()
    N = 0
    for filename in tqdm(f.namelist(), ascii=True):
        if ".wav" not in filename:
            continue
        filename_end = filename.split("/")[-1]
        digits.append(int(filename_end.split("_")[0]))
        speakers.append(int(filename_end.split("_")[1]) - 1)
        wavfile = f.read(filename)
        byt = io.BytesIO(wavfile)
        wavs.append(wav_read(byt)[1].astype("float32"))
        N = max(N, len(wavs[-1]))

    digits = np.array(digits)
    speakers = np.array(speakers)
    all_wavs = np.zeros((len(wavs), N))
    for i in range(len(wavs)):
        left = (N - len(wavs[i])) // 2
        all_wavs[i, left:left + len(wavs[i])] = wavs[i]
    print("Audio-MNIST loaded in {} s.".format(time.time() - t0))
    return all_wavs, digits, speakers
Example #16
0
    def load(path=None):
        """ESC 50.
    
        https://github.com/karolpiczak/ESC-50#download
    
    
        Parameters
        ----------
            path: str (optional)
                default $DATASET_path), the path to look for the data and
                where the data will be downloaded if not present
        """

        if path is None:
            path = os.environ['DATASET_PATH']
        esc50.download(path)
        t0 = time.time()

        f = zipfile.ZipFile(path + 'esc50/master.zip')

        meta = np.loadtxt(io.BytesIO(f.read('ESC-50-master/meta/esc50.csv')),
                          delimiter=',',
                          skiprows=1,
                          dtype='str')
        filenames = list(meta[:, 0])
        folds = meta[:, 1].astype('int32')
        fine_labels = meta[:, 2].astype('int32')
        categories = meta[:, 3]
        coarse_labels = np.array([esc50.fine_to_coarse[c]
                                  for c in categories]).astype('int32')

        wavs = list()
        order = list()
        N = 0
        for filename in f.namelist():
            if '.wav' not in filename:
                continue
            wavfile = f.read(filename)
            byt = io.BytesIO(wavfile)
            wavs.append(wav_read(byt)[1].astype('float32')[::subsample])
            order.append(filenames.index(filename.split('/')[-1]))
            N = max(N, len(wavs[-1]))

        all_wavs = np.zeros((len(wavs), N))
        for i in range(len(wavs)):
            left = (N - len(wavs[i])) // 2
            all_wavs[order[i], left:left + len(wavs[i])] = wavs[i]
        return all_wavs, fine_labels, coarse_labels, categories
Example #17
0
def load(path=None):
    """Binary audio classification, presence or absence of a bird.

    `Warblr <http://machine-listening.eecs.qmul.ac.uk/bird-audio-detection-challenge/#downloads>`_
    comes from a UK bird-sound crowdsourcing
    research spinout called Warblr. From this initiative we have
    10,000 ten-second smartphone audio recordings from around the UK.
    The audio totals around 44 hours duration. The audio will be
    published by Warblr under a Creative Commons licence. The audio
    covers a wide distribution of UK locations and environments, and
    includes weather noise, traffic noise, human speech and even human
    bird imitations. It is directly representative of the data that is
    collected from a mobile crowdsourcing initiative.
    Load the data given a path
    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, _name, _urls)

    # Load the dataset (download if necessary) and set
    # the class attributes.
    print("Loading warblr")
    t = time.time()

    # Loading Labels
    labels = np.loadtxt(
        path + "warblr/warblrb10k_public_metadata.csv",
        delimiter=",",
        skiprows=1,
        dtype="str",
    )
    # Loading the files
    f = zipfile.ZipFile(path + "warblr/warblrb10k_public_wav.zip")
    N = labels.shape[0]
    wavs = list()
    for i, files_ in tqdm(enumerate(labels), ascii=True):
        wavfile = f.read("wav/" + files_[0] + ".wav")
        byt = io.BytesIO(wavfile)
        wavs.append(np.expand_dims(wav_read(byt)[1].astype("float32"), 0))
    labels = labels[:, 1].astype("int32")

    print("Dataset warblr loaded in", "{0:.2f}".format(time.time() - t), "s.")
    dataset = {"wavs": wavs, "labels": labels}
    return dataset
Example #18
0
 def __init__(self,
              wavfile,
              fs,
              windowlen,
              slidelen,
              fft_n=512,
              mel_n=25,
              p=13):
     if type(wavfile) == str:
         self.fs, self.signal = wav_read(wavfile)
     else:
         self.fs = fs
         self.signal = np.asarray(wavfile)
     self.windowlen = windowlen
     self.slidelen = slidelen
     self.fft_n = fft_n
     self.mel_n = 25
     self.p = p
Example #19
0
    def load(path=None, classes=range(10)):

        if path is None:
            path = os.environ['DATASET_PATH']
        download(path)

        t0 = time.time()

        # Loading the file
        files = tarfile.open(path + 'ust/audio-dev.tar.gz', 'r:gz')
        annotations = np.loadtxt(path + 'ust/annotations-dev.csv',
                                 delimiter=',',
                                 skiprows=1,
                                 dtype='str')

        # get name
        filenames = list(annotations[:, 2])
        for i in range(len(filenames)):
            filenames[i] = annotations[i, 0] + '/' + str(filenames[i])

        # get fine labels and limts for coarse classes
        fine_labels = annotations[:, 4:33].astype('float32').astype('int32')
        class_limits = [0, 4, 9, 10, 14, 19, 23, 28, 29]
        n_classes = len(class_limits) - 1
        n_samples = len(annotations)
        llabels = np.zeros((n_samples, n_classes), dtype='int')
        for k in range(n_classes):
            block = fine_labels[:, class_limits[k]:class_limits[k + 1]]
            block = block.astype('float32').astype('int32')
            llabels[:, k] = block.max(1)

        POT = []
        wavs = np.zeros((2794, 441000))
        labels = np.zeros((2794, n_classes)).astype('int')
        filenames = files.getnames()
        cpt = 0
        for name in tqdm(filenames, ascii=True):
            if '.wav' not in name:
                continue
            wav = wav_read(files.extractfile(name))[1].astype('float32')
            wavs[cpt, :len(wav)] = wav
            labels[cpt] = llabels[filenames.index(name)]
            cpt += 1
        return wavs, labels
Example #20
0
def load_DCLDE(window_size=441000,PATH=None):
    """ToDo
    """
    if PATH is None:
        PATH = os.environ['DATASET_PATH']
    dict_init = [('sampling_rate',44100),("n_classes",2),("path",PATH),
                ("name","freefield1010"),('classes',["no bird","bird"])]
    dataset = Dataset(**dict(dict_init))

    # Load the dataset (download if necessary) and set
    # the class attributes.

    print("Loading DCLDE")
    t = time.time()

    if not os.path.isdir(PATH+'DCLDE'):
        print('\tCreating Directory')
        os.mkdir(PATH+'DCLDE')
    if not os.path.exists(PATH+'DCLDE/DCLDE_LF_Dev.zip'):
        url = 'http://sabiod.univ-tln.fr/workspace/DCLDE2018/DCLDE_LF_Dev.zip'
        with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, 
                                                    desc='Wav files') as t:
            urllib.request.urlretrieve(url,PATH+'DCLDE/DCLDE_LF_Dev.zip')

    # Loading the files
    f       = zipfile.ZipFile(PATH+'DCLDE/DCLDE_LF_Dev.zip')
    wavs    = list()
#    labels  = list()
    for zipf in tqdm(f.filelist,ascii=True):
        if '.wav' in zipf.filename and '.d100.' in zipf.filename:
            wavfile   = f.read(zipf)
            byt       = io.BytesIO(wavfile)
            wav       = wav_read(byt)[1].astype('float32')
            for s in range(len(wav)//window_size):
                wavs.append(wav[s*window_size:(s+1)*window_size])
#            labels.append(zipf.filename.split('/')[2])
#    return wavs,labels
    wavs = np.expand_dims(np.asarray(wavs),1)
    dataset.add_variable({'signals':{'train_set':wavs}})

    print('Dataset freefield1010 loaded in','{0:.2f}'.format(time.time()-t),'s.')
    return dataset
Example #21
0
    def __init__(self, channel_file_name):
        self.file_name = channel_file_name
        self.file_name_formatted = channel_file_name[:-4].capitalize()
        try:
            # Ignoring warnings here because SciPy warns if it finds non-data block, like the header, which is not a problem for us
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.sample_rate, self.frames_array = wav_read(
                    "./" + input_files_folder + "/" + channel_file_name)
                # The time that each sample takes is the reciprocal of the sample rate
                self.timing = 1 / self.sample_rate
        except FileNotFoundError:
            print(
                "Oops, I thought I found a file, but it seems it does not exist... \nIf you are seeing this, something went pretty wrong, "
                "but i'm continuing anyway")
        self.note_indices = self.get_note_indices()

        # The first time we want all the notes timing from the start of the list
        self.note_framing_list = self.get_note_framing_list(0)
        self.note_timing_list = self.get_note_timing_list(
            self.note_framing_list)
Example #22
0
    def redraw():
        """Clears and re-draws WAV plot with trim lines."""
        nonlocal sample_rate
        plot.cla()

        if current_path:
            wav_path = input_dir / current_path
            _LOGGER.debug("Loading %s", wav_path)
            sample_rate, wav_data = wav_read(str(wav_path))
            audio = wav_data[:, 0]
            plot.plot(audio, color="blue")
            plot.set_xlim(0, len(audio))

            # Trim lines
            if left_cut is not None:
                plot.axvline(linewidth=2, x=left_cut, color="red")

            if right_cut is not None:
                plot.axvline(linewidth=2, x=right_cut, color="green")

        canvas.draw()
Example #23
0
    def read_wav(cls, path, channel=None, mmap=False):
        """load waveform from file"""
        try:
            fs, value = wav_read(path, mmap=mmap)
            if np.ndim(value) == 1:
                value = value.reshape(-1, 1)
        except ValueError:
            try:
                if mmap:
                    logger.warning(
                        "mmap is not supported by soundfile, ignoring")
                import soundfile as sf

                audioEncodings: DefaultDict[str, str] = defaultdict(
                    lambda: "float64")
                audioEncodings[
                    "PCM_S8"] = "int16"  # soundfile does not support int8
                audioEncodings[
                    "PCM_U8"] = "int16"  # soundfile does not support uint16
                audioEncodings["PCM_16"] = "int16"
                audioEncodings["PCM_24"] = "int32"  # there is no np.int24
                audioEncodings["PCM_32"] = "int32"
                audioEncodings["FLOAT"] = "float32"
                audioEncodings["DOUBLE"] = "float64"
                file_info = sf.info(path)

                value, fs = sf.read(path,
                                    dtype=audioEncodings[file_info.subtype],
                                    always_2d=True)
            except ImportError:
                logger.error(
                    "Install soundfile for greater audio file compatability")
            except RuntimeError:
                logger.error("Soundfile was unable to open file")
                return None

        if channel is not None:
            value = value[:, channel]
        wav = Wave(value, fs, path=path)
        return wav
Example #24
0
 def load(path=None):
 
     if path is None:
         path = os.environ['DATASET_PATH']
     speech_commands.download(path)
 
     t0 = time.time()
 
     print('Loading speech command')
 
     tar = tarfile.open(path+'speech_commands/speech_commands_v0.01.tar.gz', 'r:gz')
 
     # Load train set
     wavs = list()
     labels = list()
     noises = list()
     noise_labels = list()
     names = tar.getmembers()
     for name in tqdm(names, ascii=True):
         if 'wav' not in name.name:
             continue
         f = tar.extractfile(name.name)#.read()
         wav = wav_read(f)[1]
         if 'noise' in name.name:
             noises.append(wav)
             noise_labels.append(name.name.split('/')[-1])
         else:
             left = 16000 - len(wav)
             to_pad = left // 2
             wavs.append(np.pad(wav, [[to_pad, left - to_pad]]))
             labels.append(name.name.split('/')[-2])
     labels = np.array(labels)
     unique_labels = np.unique(labels)
     y = np.squeeze(np.array([np.nonzero(label == unique_labels)[0]
                     for label in labels]).astype('int32'))
 
     print('Dataset speech commands loaded in{0:.2f}s.'.format(time.time()-t0))
 
     return np.array(wavs).astype('float32'), y, labels, noises, noise_labels
Example #25
0
 def load(path=None):
     if path is None:
         path = os.environ['DATASET_path']
 
     freefield1010.download(path)
     t = time.time()
 
     # load labels
     labels = np.loadtxt(path+'freefield1010/ff1010bird_metadata.csv',
             delimiter=',',skiprows=1,dtype='int32')
     # load wavs
     f       = zipfile.ZipFile(path+'freefield1010/ff1010bird_wav.zip')
     # init. the data array
     N       = labels.shape[0]
     wavs    = np.empty((N,441000//subsample),dtype='float32')
     for i, files_ in tqdm(enumerate(labels[:,0]),ascii=True, total=N):
         wavfile   = f.read('wav/'+str(files_)+'.wav')
         byt       = io.BytesIO(wavfile)
         wavs[i]   = wav_read(byt)[1].astype('float32')[::subsample]
 
     labels = labels[:,1]
 
     return wavs, labels
Example #26
0
def load(path=None):
    """Audio binary classification, presence or absence of bird songs.
    `freefield1010 <http://machine-listening.eecs.qmul.ac.uk/bird-audio-detection-challenge/#downloads>`_.
    is a collection of over 7,000 excerpts from field recordings
    around the world, gathered by the FreeSound project, and then standardised
    for research. This collection is very diverse in location and environment,
    and for the BAD Challenge we have newly annotated it for the
    presence/absence of birds.
    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, "freefield1010", _urls)

    # load labels
    labels = np.loadtxt(
        path + "freefield1010/ff1010bird_metadata.csv",
        delimiter=",",
        skiprows=1,
        dtype="int32",
    )
    # load wavs
    f = zipfile.ZipFile(path + "freefield1010/ff1010bird_wav.zip")
    # init. the data array
    N = labels.shape[0]
    wavs = np.empty((N, 441000), dtype="float32")
    for i, files_ in tqdm(enumerate(labels[:, 0]), ascii=True, total=N):
        wavfile = f.read("wav/" + str(files_) + ".wav")
        byt = io.BytesIO(wavfile)
        wavs[i] = wav_read(byt)[1].astype("float32")

    labels = labels[:, 1]

    data = {"wavs": wavs, "labels": labels}
    return data
Example #27
0
    def __init__(
        self,
        filepath_or_array,
    ):
        if isinstance(filepath_or_array, str):
            self.sample_rate, self.data = wav_read(filepath_or_array)
            self.data = self.data / (2.**15.)
        elif isinstance(filepath_or_array, wav):
            self.sample_rate, self.data = filepath_or_array.sample_rate, filepath_or_array.data
        elif len(filepath_or_array) == 2 and not isinstance(
                filepath_or_array, np.ndarray):
            self.sample_rate, self.data = filepath_or_array[0], np.asarray(
                filepath_or_array[1])
        else:
            self.sample_rate, self.data = self.DEFAULT_SAMPLE_RATE, np.asarray(
                filepath_or_array)

        if len(self.data.shape) == 1:
            self.data = self.data.reshape(self.data.size, 1)

        self.shape = self.data.shape
        self.size, self.channels = self.shape
        self.length = float(self.size / self.sample_rate)
        self.time = np.arange(0, self.length, 1.0 / float(self.sample_rate))
Example #28
0
File: esc.py Project: SymJAX/SymJAX
def load(path=None):
    """ESC-10/50: Environmental Sound Classification

    https://github.com/karolpiczak/ESC-50#download

    The ESC-50 dataset is a labeled collection of 2000 environmental audio
    recordings suitable for benchmarking methods of environmental sound
    classification.

    The dataset consists of 5-second-long recordings organized into 50
    semantical classes (with 40 examples per class) loosely arranged into 5
    major categories:
        Animals
        Natural soundscapes & water sounds
        Human, non-speech sounds
        Interior/domestic sounds
        Exterior/urban noises

    Clips in this dataset have been manually extracted from public field
    recordings gathered by the Freesound.org project. The dataset has been
    prearranged into 5 folds for comparable cross-validation, making sure
    that fragments from the same original source file are contained in a
    single fold.

    ESC 50.

    https://github.com/karolpiczak/ESC-50#download


    Parameters
    ----------

    path: str (optional)
            default $DATASET_path), the path to look for the data and
            where the data will be downloaded if not present

    Returns
    -------

    wavs: array
        the wavs as a numpy array (matrix) with first dimension the data
        and second dimension time

    fine_labels: array
        the labels of the final classes (50 different ones) as a integer
        vector

    coarse_labels: array
        the labels of the classes big cateogry (5 of them)

    folds: array
        the fold as an integer from 1 to 5 specifying how to split the data
        one should not split a fold into train and set as it would
        make the same recording (but different subparts) be present in train
        and test, biasing optimistically the results.

    esc10: array
        the boolean vector specifying if the corresponding datum (wav, label,
        ...) is in the ESC-10 dataset or not. That is, to load the ESC-10
        dataset simply load ESC-50 and use this boolean vector to extract
        only the ESC-10 data.
    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, _dataset, _urls, _baseurl)

    t0 = time.time()

    f = zipfile.ZipFile(path + "esc50/master.zip")

    meta = np.loadtxt(
        io.BytesIO(f.read("ESC-50-master/meta/esc50.csv")),
        delimiter=",",
        skiprows=1,
        dtype="str",
    )
    filenames = list(meta[:, 0])
    folds = meta[:, 1].astype("int32")
    fine_labels = meta[:, 2].astype("int32")
    categories = meta[:, 3]
    esc10 = meta[:, 4] == "True"
    coarse_labels = np.array([esc.fine_to_coarse[c] for c in categories])
    coarse_labels = coarse_labels.astype("int32")

    wavs = list()
    order = list()
    N = 0
    for filename in tqdm(f.namelist(), ascii=True):
        if ".wav" not in filename:
            continue
        wavfile = f.read(filename)
        byt = io.BytesIO(wavfile)
        wavs.append(wav_read(byt)[1].astype("float32"))
        order.append(filenames.index(filename.split("/")[-1]))
        N = max(N, len(wavs[-1]))

    all_wavs = np.zeros((len(wavs), N))
    for i in range(len(wavs)):
        left = (N - len(wavs[i])) // 2
        all_wavs[order[i], left:left + len(wavs[i])] = wavs[i]
    data = {
        "wavs": all_wavs,
        "fine_labels": fine_labels,
        "coarse_labels": coarse_labels,
        "folds": folds,
        "esc10": esc10,
    }
    return data
Example #29
0
    def load(path=None):
        """
        Parameters
        ----------
        
        path: str (optional)
                default $DATASET_path), the path to look for the data and
                where the data will be downloaded if not present

        Returns
        -------

        wavs: array
            the wavs as a numpy array (matrix) with first dimension the data
            and second dimension time
        
        labels: array
            the labels of the final classes (41 different ones) as a integer
            vector
        """

        if path is None:
            path = os.environ['DATASET_PATH']
        FSDKaggle2018.download(path)
        t0 = time.time()

        f = zipfile.ZipFile(path + 'FSDKaggle2018/audio_train.zip')
        wavs_train = list()
        names_train = list()
        for filename in tqdm(f.namelist(),
                             ascii=True,
                             desc='Loading train set'):
            if '.wav' not in filename:
                continue
            wavfile = f.read(filename)
            byt = io.BytesIO(wavfile)
            wavs_train.append(wav_read(byt)[1].astype('float32'))
            names_train.append((filename.split('/')[-1]))

        f = zipfile.ZipFile(path + 'FSDKaggle2018/audio_test.zip')
        wavs_test = list()
        names_test = list()
        for filename in tqdm(f.namelist(), ascii=True,
                             desc='Loading test set'):
            if '.wav' not in filename:
                continue
            wavfile = f.read(filename)
            byt = io.BytesIO(wavfile)
            wavs_test.append(wav_read(byt)[1].astype('float32'))
            names_test.append((filename.split('/')[-1]))

        f = zipfile.ZipFile(path + 'FSDKaggle2018/meta.zip')
        meta_train = np.loadtxt(io.BytesIO(
            f.read('FSDKaggle2018.meta/train_post_competition.csv')),
                                delimiter=',',
                                skiprows=1,
                                dtype='str')
        meta_test = np.loadtxt(io.BytesIO(
            f.read(
                'FSDKaggle2018.meta/test_post_competition_scoring_clips.csv')),
                               delimiter=',',
                               skiprows=1,
                               dtype='str')

        filenames = list(meta_train[:, 0])
        labels_train, verified, fsid_train = [], [], []
        for i in range(len(wavs_train)):
            index = filenames.index(names_train[i])
            labels_train.append(meta_train[index][1])
            verified.append(meta_train[index][2])
            fsid_train.append(meta_train[index][3])

        filenames = list(meta_test[:, 0])
        labels_test, usage, fsid_test = [], [], []
        for i in range(len(wavs_test)):
            index = filenames.index(names_test[i])
            labels_test.append(meta_test[index][1])
            usage.append(meta_test[index][2])
            fsid_test.append(meta_test[index][3])
        dataset = {
            'wavs_train': wavs_train,
            'labels_train': labels_train,
            'verified_train': verified,
            'fsid_train': fsid_train,
            'wavs_test': wavs_test,
            'labels_test': labels_test,
            'usage_test': usage,
            'fsid_test': fsid_test
        }
        return dataset
Example #30
0
def load(path=None):
    """music instrument classification

    ref https://zenodo.org/record/1290750#.WzCwSRyxXMU

    This dataset includes musical audio excerpts with annotations of the
    predominant instrument(s) present. It was used for the evaluation in the
    following article:

    Bosch, J. J., Janer, J., Fuhrmann, F., & Herrera, P. “A Comparison of Sound
    Segregation Techniques for Predominant Instrument Recognition in Musical
    Audio Signals”, in Proc. ISMIR (pp. 559-564), 2012

    Please Acknowledge IRMAS in Academic Research

    IRMAS is intended to be used for training and testing methods for the
    automatic recognition of predominant instruments in musical audio. The
    instruments considered are: cello, clarinet, flute, acoustic guitar,
    electric guitar, organ, piano, saxophone, trumpet, violin, and human singing
    voice. This dataset is derived from the one compiled by Ferdinand Fuhrmann
    in his PhD thesis, with the difference that we provide audio data in stereo
    format, the annotations in the testing dataset are limited to specific
    pitched instruments, and there is a different amount and lenght of excerpts.
    """
    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, "irmas", _urls)

    t0 = time.time()

    train_wavs = list()
    train_labels = list()
    test_wavs = list()
    test_labels = list()

    # loading the training set
    f = zipfile.ZipFile(path + "irmas/IRMAS-TrainingData.zip")
    namelist = f.namelist()
    for filename in tqdm(namelist, ascii=True):
        if ".wav" not in filename:
            continue
        wavfile = f.read(filename)
        byt = io.BytesIO(wavfile)
        train_wavs.append(wav_read(byt)[1].astype("float32"))
        train_labels.append(filename.split("/")[-2])

    base = "irmas/IRMAS-TestingData-Part{}.zip"
    for part in ["1", "2", "3"]:
        f = zipfile.ZipFile(path + base.format(part))
        namelist = f.namelist()
        for filename in tqdm(namelist,
                             ascii=True,
                             desc="Test data {}/3".format(part)):
            if ".wav" not in filename:
                continue

            byt = io.BytesIO(f.read(filename))
            test_wavs.append(wav_read(byt)[1].astype("float32"))

            byt = io.BytesIO(f.read(filename.replace(".wav", ".txt")))
            test_labels.append(np.loadtxt(byt, dtype="str"))

    data = {
        "train_set/wavs": np.array(train_wavs),
        "train_set/labels": train_labels,
        "test_wavs": np.array(test_wavs),
        "test_labels": test_labels,
    }

    print("Dataset IRMAS loaded in {0:.2f}s.".format(time.time() - t0))
    return data
Example #31
0
def to_wav(mp4_path):
    with TemporaryDirectory() as d:
        wav_path = path.join(d, "out.wav")
        check_call(["ffmpeg", "-v", "0", "-i", mp4_path, wav_path])
        return wav_read(wav_path)