Example #1
0
    def __iter__(self):
        local_data = ensure_local_file(self._url, self.path)

        json_data = None
        with tarfile.open(local_data) as tar:
            for info in tar:

                if fnmatch.fnmatch(info.name, '*.json'):
                    flo = tar.extractfile(member=info)
                    json_data = json.load(flo)

        with tarfile.open(local_data) as tar:
            for info in tar:
                if fnmatch.fnmatch(info.name, '*.json'):
                    continue
                if not info.isfile():
                    continue
                path_segments = os.path.split(info.name)
                _id = os.path.splitext(path_segments[1])[0]
                wav_flo = tar.extractfile(member=info)
                url = \
                    'https://magenta.tensorflow.org/datasets/nsynth/{_id}' \
                        .format(**locals())
                pdl = PreDownload(wav_flo.read(), url)
                yield AudioMetaData(
                    uri=pdl,
                    web_url='https://magenta.tensorflow.org/datasets/nsynth',
                    **json_data[_id])
Example #2
0
 def __iter__(self):
     resp = requests.get('http://phatdrumloops.com/beats.php')
     pattern = re.compile('href="(?P<uri>/audio/wav/[^\.]+\.wav)"')
     for m in pattern.finditer(resp.content):
         url = urlparse.urljoin('http://phatdrumloops.com',
                                m.groupdict()['uri'])
         request = requests.Request(method='GET',
                                    url=url,
                                    headers={'Range': 'bytes=0-'})
         yield AudioMetaData(uri=request, **self.attrs)
Example #3
0
    def _freesound_to_audio_metadata(self, data):
        request = requests.Request(method='GET',
                                   url=data['previews']['preview-hq-ogg'],
                                   params={'token': self.api_key})

        web_url = 'https://freesound.org/people/{username}/sounds/{id}/'\
            .format(**data)

        return AudioMetaData(uri=request,
                             samplerate=data['samplerate'],
                             channels=data['channels'],
                             licensing=data['license'],
                             description=data['description'],
                             tags=data['tags'],
                             web_url=web_url)
Example #4
0
    def test_can_add_additional_data_to_index(self):
        Model = self._model(slice_size=128,
                            settings=self._settings_with_event_log())

        index = self._index(Model,
                            Model.sliced,
                            web_url=lambda doc, ts: doc.meta['web_url'])

        signal = SineSynthesizer(SR11025()) \
            .synthesize(Seconds(5), [220, 440, 880])
        meta = AudioMetaData(uri=signal.encode(),
                             web_url='https://example.com')
        _id = Model.process(meta=meta)
        index._synchronously_process_events()

        results = list(index.random_search(n_results=5))
        result_id, ts, extra_data = results[0]
        self.assertEqual(_id, result_id)
        self.assertEqual('https://example.com', extra_data['web_url'])
Example #5
0
    def __iter__(self):
        local_metadata = ensure_local_file(self._metadata, self.path)

        metadata = dict()
        with open(local_metadata, 'rb') as f:
            reader = csv.DictReader(f)
            for row in reader:
                metadata[row['id']] = row

        train_audio_path = os.path.join(self.path, 'train_data')

        for filename in os.listdir(train_audio_path):
            full_path = os.path.join(train_audio_path, filename)
            _id, ext = os.path.splitext(filename)
            url = \
                'https://homes.cs.washington.edu/~thickstn/media/{_id}'\
                    .format(**locals())
            meta = metadata[_id]
            samples = AudioSamples.from_file(full_path)
            uri = PreDownload(samples.encode().read(), url)
            yield AudioMetaData(uri=uri,
                                samplerate=int(self._samplerate),
                                **meta)
Example #6
0
 def __iter__(self):
     yield AudioMetaData(uri=PreDownload('', 'http://example.com/3'))
     yield AudioMetaData(uri=PreDownload('', 'http://example.com/4'))