def __iter__(self): local_data = ensure_local_file(self._url, self.path) json_data = None with tarfile.open(local_data) as tar: for info in tar: if fnmatch.fnmatch(info.name, '*.json'): flo = tar.extractfile(member=info) json_data = json.load(flo) with tarfile.open(local_data) as tar: for info in tar: if fnmatch.fnmatch(info.name, '*.json'): continue if not info.isfile(): continue path_segments = os.path.split(info.name) _id = os.path.splitext(path_segments[1])[0] wav_flo = tar.extractfile(member=info) url = \ 'https://magenta.tensorflow.org/datasets/nsynth/{_id}' \ .format(**locals()) pdl = PreDownload(wav_flo.read(), url) yield AudioMetaData( uri=pdl, web_url='https://magenta.tensorflow.org/datasets/nsynth', **json_data[_id])
def __iter__(self): resp = requests.get('http://phatdrumloops.com/beats.php') pattern = re.compile('href="(?P<uri>/audio/wav/[^\.]+\.wav)"') for m in pattern.finditer(resp.content): url = urlparse.urljoin('http://phatdrumloops.com', m.groupdict()['uri']) request = requests.Request(method='GET', url=url, headers={'Range': 'bytes=0-'}) yield AudioMetaData(uri=request, **self.attrs)
def _freesound_to_audio_metadata(self, data): request = requests.Request(method='GET', url=data['previews']['preview-hq-ogg'], params={'token': self.api_key}) web_url = 'https://freesound.org/people/{username}/sounds/{id}/'\ .format(**data) return AudioMetaData(uri=request, samplerate=data['samplerate'], channels=data['channels'], licensing=data['license'], description=data['description'], tags=data['tags'], web_url=web_url)
def test_can_add_additional_data_to_index(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced, web_url=lambda doc, ts: doc.meta['web_url']) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) meta = AudioMetaData(uri=signal.encode(), web_url='https://example.com') _id = Model.process(meta=meta) index._synchronously_process_events() results = list(index.random_search(n_results=5)) result_id, ts, extra_data = results[0] self.assertEqual(_id, result_id) self.assertEqual('https://example.com', extra_data['web_url'])
def __iter__(self): local_metadata = ensure_local_file(self._metadata, self.path) metadata = dict() with open(local_metadata, 'rb') as f: reader = csv.DictReader(f) for row in reader: metadata[row['id']] = row train_audio_path = os.path.join(self.path, 'train_data') for filename in os.listdir(train_audio_path): full_path = os.path.join(train_audio_path, filename) _id, ext = os.path.splitext(filename) url = \ 'https://homes.cs.washington.edu/~thickstn/media/{_id}'\ .format(**locals()) meta = metadata[_id] samples = AudioSamples.from_file(full_path) uri = PreDownload(samples.encode().read(), url) yield AudioMetaData(uri=uri, samplerate=int(self._samplerate), **meta)
def __iter__(self): yield AudioMetaData(uri=PreDownload('', 'http://example.com/3')) yield AudioMetaData(uri=PreDownload('', 'http://example.com/4'))