def get_audio_data(self, idx): """ Returns audio data of a specific item without loading the full audio. N.B. see essentia.standard.MetadataReader! Returns ------- list of tuples : each tuple is referred to a source and contains the following int : duration in seconds int : bitrate (kb/s) int : sample rate int : number of channels """ recordings_fn = self.paths[idx][0] metadata = [] for recording_fn in recordings_fn: recording_fn = joinpath(self.install_dir, recording_fn) reader = MetadataReader(filename=str(recording_fn), filterMetadata=True) metadata.append(reader()[-4:]) return metadata
def get_song_features(analysis, filename): metadata_reader = MetadataReader(filename=filename) metadata = metadata_reader() track_title = metadata[0] track_artist = metadata[1] track_year = metadata[6] danceability = analysis['desc']['en_danceability'] speechiness = analysis['desc']['en_speechiness'] valence = analysis['desc']['en_valence'] acousticness = analysis['desc']['en_acousticness'] energy = analysis['desc']['en_energy'] return track_title, track_artist, track_year, danceability, speechiness, valence, acousticness, energy
def open_audio(audio_fn: Union[str, pathlib.Path]) -> Tuple[np.ndarray, int]: """ Open the audio file in `audio_fn` and returns a numpy array containing it, one row for each channel (only Mono supported for now) and the orginal sample_rate """ reader = MetadataReader(filename=str(audio_fn), filterMetadata=True) sample_rate = reader()[-2] if sample_rate == 0: raise RuntimeError("No sample rate metadata in file " + str(audio_fn)) loader = Loader(filename=str(audio_fn), sampleRate=sample_rate, endTime=1e+07) return loader(), sample_rate
print("usage: %s <input-directory> <result.json>" % sys.argv[0]) sys.exit() result = Pool() files = [f for f in find_files(indir, FILE_EXT)] print('Found %d audio files (%s)' % (len(files), '/'.join(FILE_EXT))) i = 0 for filename in files: i += 1 print('Extracting metadata: %s' % filename) namespace = 'track_' + str(i) try: meta = MetadataReader(filename=filename, failOnError=True, tagPoolName=namespace + '.metadata')() pool_meta, duration, bitrate, samplerate, channels = meta[7:] pool_meta.set(namespace + ".file_path", os.path.relpath(filename)) pool_meta.set(namespace + ".duration", duration) pool_meta.set(namespace + ".bit_rate", bitrate) pool_meta.set(namespace + ".sample_rate", samplerate) pool_meta.set(namespace + ".channels", channels) result.merge(pool_meta) except Exception as e: print(str(e)) print("Saving results to %s" % result_file) YamlOutput(filename=result_file, format='json', doubleCheck=True,
def create_chart_dir( artist, title, audio_fp, norm, analyzers, sp_model, sp_batch_size, diffs, ss_model, idx_to_label, out_dir, delete_audio=False): if not artist or not title: print 'Extracting metadata from {}'.format(audio_fp) meta_reader = MetadataReader(filename=audio_fp) metadata = meta_reader() if not artist: artist = metadata[1] if not artist: artist = 'Unknown Artist' if not title: title = metadata[0] if not title: title = 'Unknown Title' print 'Loading {} - {}'.format(artist, title) try: song_feats = extract_mel_feats(audio_fp, analyzers, nhop=441) except: raise CreateChartException('Invalid audio file: {}'.format(audio_fp)) song_feats -= norm[0] song_feats /= norm[1] song_len_sec = song_feats.shape[0] / _HZ print 'Processed {} minutes of features'.format(song_len_sec / 60.0) diff_chart_txts = [] for diff in diffs: try: coarse, fine, threshold = _DIFF_TO_COARSE_FINE_AND_THRESHOLD[diff] except KeyError: raise CreateChartException('Invalid difficulty: {}'.format(diff)) feats_other = np.zeros((sp_batch_size, 1, 5), dtype=np.float32) feats_other[:, :, coarse] = 1.0 print 'Computing step placement scores' feats_audio = np.zeros((sp_batch_size, 1, 15, 80, 3), dtype=np.float32) predictions = [] for start in xrange(0, song_feats.shape[0], sp_batch_size): for i, frame_idx in enumerate(range(start, start + sp_batch_size)): feats_audio[i] = make_onset_feature_context(song_feats, frame_idx, 7) feed_dict = { sp_model.feats_audio: feats_audio, sp_model.feats_other: feats_other } prediction = sess.run(sp_model.prediction, feed_dict=feed_dict)[:, 0] predictions.append(prediction) predictions = np.concatenate(predictions)[:song_feats.shape[0]] print predictions.shape print 'Peak picking' predictions_smoothed = np.convolve(predictions, np.hamming(5), 'same') maxima = argrelextrema(predictions_smoothed, np.greater_equal, order=1)[0] placed_times = [] for i in maxima: t = float(i) * _DT if predictions[i] >= threshold: placed_times.append(t) print 'Found {} peaks, density {} steps per second'.format(len(placed_times), len(placed_times) / song_len_sec) print 'Performing step selection' state = sess.run(ss_model.initial_state) step_prev = '<-1>' times_arr = [placed_times[0]] + placed_times + [placed_times[-1]] selected_steps = [] for i in xrange(1, len(times_arr) - 1): dt_prev, dt_next = times_arr[i] - times_arr[i-1], times_arr[i+1] - times_arr[i] feed_dict = { ss_model.syms: np.array([[ss_model.arrow_to_encoding(step_prev, 'bagofarrows')]], dtype=np.float32), ss_model.feats_other: np.array([[[dt_prev, dt_next]]], dtype=np.float32), ss_model.feats_audio: np.zeros((1, 1, 1, 0, 0), dtype=np.float32), ss_model.initial_state: state } scores, state = sess.run([ss_model.scores, ss_model.final_state], feed_dict=feed_dict) step_idx = 0 while step_idx <= 1: step_idx = weighted_pick(scores) step = idx_to_label[step_idx] selected_steps.append(step) step_prev = step assert len(placed_times) == len(selected_steps) print 'Creating chart text' time_to_step = {int(round(t * _HZ)) : step for t, step in zip(placed_times, selected_steps)} max_subdiv = max(time_to_step.keys()) if max_subdiv % _SUBDIV != 0: max_subdiv += _SUBDIV - (max_subdiv % _SUBDIV) full_steps = [time_to_step.get(i, '0000') for i in xrange(max_subdiv)] measures = [full_steps[i:i+_SUBDIV] for i in xrange(0, max_subdiv, _SUBDIV)] measures_txt = '\n,\n'.join(['\n'.join(measure) for measure in measures]) chart_txt = _CHART_TEMPL.format( ccoarse=_DIFFS[coarse], cfine=fine, measures=measures_txt ) diff_chart_txts.append(chart_txt) print 'Creating SM' out_dir_name = os.path.split(out_dir)[1] audio_out_name = out_dir_name + os.path.splitext(audio_fp)[1] sm_txt = _TEMPL.format( title=title, artist=artist, music_fp=audio_out_name, bpm=_BPM, charts='\n'.join(diff_chart_txts)) print 'Saving to {}'.format(out_dir) try: os.mkdir(out_dir) audio_ext = os.path.splitext(audio_fp)[1] shutil.copyfile(audio_fp, os.path.join(out_dir, audio_out_name)) with open(os.path.join(out_dir, out_dir_name + '.sm'), 'w') as f: f.write(sm_txt) except: raise CreateChartException('Error during output') if delete_audio: try: os.remove(audio_fp) except: raise CreateChartException('Error deleting audio') return True
def extract_melody(path=None, audio=None, sf=44100, quantise=False, verbose=False, **kwargs): '''Automatically extract the musical note sequence from raw audio imput of natural speech. Parameters ---------- path : str, optional File path to audio file. If None then must supply `audio` (and associated `sf`). Default None. audio : array_like, optional Numpy array containing the audio sampled at sample frequency `sf`. Required if `path` is not provided. Defualt None. sf : {32000, 44100, 48000}, optional The sample frequency (in Hertz) of the audio array `audio`, if provided. Ignored if `path` is used. Default 44100. quantise : bool, optional Whether to quantise the note values to match those of MIDI notes (where A=440Hz). Default False. verbose : bool, optional Print details. Default False. **kwargs : optional Keyword arguments to pass onto `segment_notes()` and `get_notes()`. Returns ------- ns : array Note start times (in seconds). nl : array Note lengths (in seconds). nv : array Note values (in Hertz). Raises ------ ValueError If neither `path` nor 'audio' is provided. ValueError If `path` cannot be found. ''' if type(path) != type(None): if not os.path.isfile(path): raise ValueError('Path {} not found.'.format(path)) sf = MetadataReader(filename=path)()[10] audio = EqloudLoader(filename=path, sampleRate=sf)() elif type(audio) == type(None): raise ValueError( 'Must provide either filepath (`path`) or array of audio data (`audio`).' ) if sf not in {32000, 44100, 48000}: raise ValueError( 'Sample frequency `sf` must be in {32000, 44100, 48000}.') # Sound object as parsed by Praat sound = parselmouth.Sound(values=np.asarray(audio, dtype=np.float64), sampling_frequency=sf) # Pitch and Intensity vectors p = sound.to_pitch_ac(time_step=0.01, octave_jump_cost=0.6) I = sound.to_intensity(minimum_pitch=50, time_step=0.01) end_time = sound.end_time #ts = np.linspace(0, end_time, len(p)) if verbose: print('- Loaded audio: sf={}, length={:.2f} seconds'.format( sf, end_time)) _, _, nuclei = segment_notes(I, p, verbose=verbose, **kwargs) ns, nl, nv = get_notes(I, p, nuclei, verbose=verbose, **kwargs) if verbose: print('== Found {} notes.'.format(len(nv))) if quantise: if verbose: print('- quantising note values') nv = quantise_notes(nv) if verbose: print('\n== Done') return ns, nl, nv
echo_conf.ECHO_NEST_API_KEY = api_key path = sys.argv[1] output_file = sys.argv[2] files = [] # find files that need to be analyzed for dirpath, dirnames, filenames in os.walk(path): for filename in [f for f in filenames if f.endswith(".mp3")]: files.append(os.path.join(dirpath, filename)) all_files = [] count = 0 for filename in files: metadata_reader = MetadataReader(filename=filename) metadata = metadata_reader() track_title = metadata[0] track_artist = metadata[1] track_year = metadata[6] while True: try: pytrack = echo_track.track_from_filename(filename) pytrack.get_analysis() break except: print "Error encountered" time.sleep(60) echonest_id = pytrack.id count += 1 print "Files analyzed: ", count