Esempio n. 1
0
    def get_audio_data(self, idx):
        """
        Returns audio data of a specific item without loading the full audio.

        N.B. see essentia.standard.MetadataReader!

        Returns
        -------

        list of tuples :
            each tuple is referred to a source and contains the following

        int :
            duration in seconds
        int :
            bitrate (kb/s)
        int :
            sample rate
        int :
            number of channels
        """
        recordings_fn = self.paths[idx][0]

        metadata = []
        for recording_fn in recordings_fn:
            recording_fn = joinpath(self.install_dir, recording_fn)
            reader = MetadataReader(filename=str(recording_fn),
                                    filterMetadata=True)
            metadata.append(reader()[-4:])
        return metadata
Esempio n. 2
0
def get_song_features(analysis, filename):
    metadata_reader = MetadataReader(filename=filename)
    metadata = metadata_reader()
    track_title = metadata[0]
    track_artist = metadata[1]
    track_year = metadata[6]
    danceability = analysis['desc']['en_danceability']
    speechiness = analysis['desc']['en_speechiness']
    valence = analysis['desc']['en_valence']
    acousticness = analysis['desc']['en_acousticness']
    energy = analysis['desc']['en_energy']
    return track_title, track_artist, track_year, danceability, speechiness, valence, acousticness, energy
Esempio n. 3
0
def open_audio(audio_fn: Union[str, pathlib.Path]) -> Tuple[np.ndarray, int]:
    """
    Open the audio file in `audio_fn` and returns a numpy array containing it,
    one row for each channel (only Mono supported for now) and the orginal
    sample_rate
    """

    reader = MetadataReader(filename=str(audio_fn), filterMetadata=True)
    sample_rate = reader()[-2]
    if sample_rate == 0:
        raise RuntimeError("No sample rate metadata in file " + str(audio_fn))

    loader = Loader(filename=str(audio_fn),
                    sampleRate=sample_rate,
                    endTime=1e+07)
    return loader(), sample_rate
Esempio n. 4
0
    print("usage: %s <input-directory> <result.json>" % sys.argv[0])
    sys.exit()

result = Pool()
files = [f for f in find_files(indir, FILE_EXT)]

print('Found %d audio files (%s)' % (len(files), '/'.join(FILE_EXT)))

i = 0
for filename in files:
    i += 1
    print('Extracting metadata: %s' % filename)
    namespace = 'track_' + str(i)
    try:
        meta = MetadataReader(filename=filename,
                              failOnError=True,
                              tagPoolName=namespace + '.metadata')()
        pool_meta, duration, bitrate, samplerate, channels = meta[7:]
        pool_meta.set(namespace + ".file_path", os.path.relpath(filename))
        pool_meta.set(namespace + ".duration", duration)
        pool_meta.set(namespace + ".bit_rate", bitrate)
        pool_meta.set(namespace + ".sample_rate", samplerate)
        pool_meta.set(namespace + ".channels", channels)
        result.merge(pool_meta)
    except Exception as e:
        print(str(e))

print("Saving results to %s" % result_file)
YamlOutput(filename=result_file,
           format='json',
           doubleCheck=True,
Esempio n. 5
0
def create_chart_dir(
        artist, title,
        audio_fp,
        norm, analyzers,
        sp_model, sp_batch_size, diffs,
        ss_model, idx_to_label,
        out_dir, delete_audio=False):
    if not artist or not title:
        print 'Extracting metadata from {}'.format(audio_fp)
        meta_reader = MetadataReader(filename=audio_fp)
        metadata = meta_reader()
        if not artist:
            artist = metadata[1]
        if not artist:
            artist = 'Unknown Artist'
        if not title:
            title = metadata[0]
        if not title:
            title = 'Unknown Title'

    print 'Loading {} - {}'.format(artist, title)
    try:
        song_feats = extract_mel_feats(audio_fp, analyzers, nhop=441)
    except:
        raise CreateChartException('Invalid audio file: {}'.format(audio_fp))
    song_feats -= norm[0]
    song_feats /= norm[1]
    song_len_sec = song_feats.shape[0] / _HZ
    print 'Processed {} minutes of features'.format(song_len_sec / 60.0)

    diff_chart_txts = []
    for diff in diffs:
        try:
            coarse, fine, threshold = _DIFF_TO_COARSE_FINE_AND_THRESHOLD[diff]
        except KeyError:
            raise CreateChartException('Invalid difficulty: {}'.format(diff))

        feats_other = np.zeros((sp_batch_size, 1, 5), dtype=np.float32)
        feats_other[:, :, coarse] = 1.0

        print 'Computing step placement scores'
        feats_audio = np.zeros((sp_batch_size, 1, 15, 80, 3), dtype=np.float32)
        predictions = []
        for start in xrange(0, song_feats.shape[0], sp_batch_size):
            for i, frame_idx in enumerate(range(start, start + sp_batch_size)):
                feats_audio[i] = make_onset_feature_context(song_feats, frame_idx, 7)

            feed_dict = {
                sp_model.feats_audio: feats_audio,
                sp_model.feats_other: feats_other
            }

            prediction = sess.run(sp_model.prediction, feed_dict=feed_dict)[:, 0]
            predictions.append(prediction)
        predictions = np.concatenate(predictions)[:song_feats.shape[0]]
        print predictions.shape

        print 'Peak picking'
        predictions_smoothed = np.convolve(predictions, np.hamming(5), 'same')
        maxima = argrelextrema(predictions_smoothed, np.greater_equal, order=1)[0]
        placed_times = []
        for i in maxima:
            t = float(i) * _DT
            if predictions[i] >= threshold:
                placed_times.append(t)
        print 'Found {} peaks, density {} steps per second'.format(len(placed_times), len(placed_times) / song_len_sec)

        print 'Performing step selection'
        state = sess.run(ss_model.initial_state)
        step_prev = '<-1>'
        times_arr = [placed_times[0]] + placed_times + [placed_times[-1]]
        selected_steps = []
        for i in xrange(1, len(times_arr) - 1):
            dt_prev, dt_next = times_arr[i] - times_arr[i-1], times_arr[i+1] - times_arr[i]
            feed_dict = {
                ss_model.syms: np.array([[ss_model.arrow_to_encoding(step_prev, 'bagofarrows')]], dtype=np.float32),
                ss_model.feats_other: np.array([[[dt_prev, dt_next]]], dtype=np.float32),
                ss_model.feats_audio: np.zeros((1, 1, 1, 0, 0), dtype=np.float32),
                ss_model.initial_state: state
            }
            scores, state = sess.run([ss_model.scores, ss_model.final_state], feed_dict=feed_dict)

            step_idx = 0
            while step_idx <= 1:
                step_idx = weighted_pick(scores)
            step = idx_to_label[step_idx]
            selected_steps.append(step)
            step_prev = step
        assert len(placed_times) == len(selected_steps)

        print 'Creating chart text'
        time_to_step = {int(round(t * _HZ)) : step for t, step in zip(placed_times, selected_steps)}
        max_subdiv = max(time_to_step.keys())
        if max_subdiv % _SUBDIV != 0:
            max_subdiv += _SUBDIV - (max_subdiv % _SUBDIV)
        full_steps = [time_to_step.get(i, '0000') for i in xrange(max_subdiv)]
        measures = [full_steps[i:i+_SUBDIV] for i in xrange(0, max_subdiv, _SUBDIV)]
        measures_txt = '\n,\n'.join(['\n'.join(measure) for measure in measures])
        chart_txt = _CHART_TEMPL.format(
            ccoarse=_DIFFS[coarse],
            cfine=fine,
            measures=measures_txt
        )
        diff_chart_txts.append(chart_txt)

    print 'Creating SM'
    out_dir_name = os.path.split(out_dir)[1]
    audio_out_name = out_dir_name + os.path.splitext(audio_fp)[1]
    sm_txt = _TEMPL.format(
        title=title,
        artist=artist,
        music_fp=audio_out_name,
        bpm=_BPM,
        charts='\n'.join(diff_chart_txts))

    print 'Saving to {}'.format(out_dir)
    try:
        os.mkdir(out_dir)
        audio_ext = os.path.splitext(audio_fp)[1]
        shutil.copyfile(audio_fp, os.path.join(out_dir, audio_out_name))
        with open(os.path.join(out_dir, out_dir_name + '.sm'), 'w') as f:
            f.write(sm_txt)
    except:
        raise CreateChartException('Error during output')

    if delete_audio:
        try:
            os.remove(audio_fp)
        except:
            raise CreateChartException('Error deleting audio')

    return True
Esempio n. 6
0
def extract_melody(path=None,
                   audio=None,
                   sf=44100,
                   quantise=False,
                   verbose=False,
                   **kwargs):
    '''Automatically extract the musical note sequence from raw audio imput
    of natural speech.

    Parameters
    ----------
    path : str, optional
        File path to audio file. If None then must supply `audio` (and associated
        `sf`). Default None.
    audio : array_like, optional
        Numpy array containing the audio sampled at sample frequency `sf`. Required
        if `path` is not provided. Defualt None.
    sf : {32000, 44100, 48000}, optional
        The sample frequency (in Hertz) of the audio array `audio`, if provided.
        Ignored if `path` is used. Default 44100.
    quantise : bool, optional
        Whether to quantise the note values to match those of MIDI notes (where A=440Hz).
        Default False.
    verbose : bool, optional
        Print details. Default False.
    **kwargs : optional
        Keyword arguments to pass onto `segment_notes()` and `get_notes()`.


    Returns
    -------
    ns : array
        Note start times (in seconds).
    nl : array
        Note lengths (in seconds).
    nv : array
        Note values (in Hertz).


    Raises
    ------
    ValueError
        If neither `path` nor 'audio' is provided.
    ValueError
        If `path` cannot be found.

    '''

    if type(path) != type(None):
        if not os.path.isfile(path):
            raise ValueError('Path {} not found.'.format(path))

        sf = MetadataReader(filename=path)()[10]
        audio = EqloudLoader(filename=path, sampleRate=sf)()

    elif type(audio) == type(None):
        raise ValueError(
            'Must provide either filepath (`path`) or array of audio data (`audio`).'
        )

    if sf not in {32000, 44100, 48000}:
        raise ValueError(
            'Sample frequency `sf` must be in {32000, 44100, 48000}.')

    # Sound object as parsed by Praat
    sound = parselmouth.Sound(values=np.asarray(audio, dtype=np.float64),
                              sampling_frequency=sf)

    # Pitch and Intensity vectors
    p = sound.to_pitch_ac(time_step=0.01, octave_jump_cost=0.6)
    I = sound.to_intensity(minimum_pitch=50, time_step=0.01)

    end_time = sound.end_time
    #ts = np.linspace(0, end_time, len(p))

    if verbose:
        print('- Loaded audio: sf={}, length={:.2f} seconds'.format(
            sf, end_time))

    _, _, nuclei = segment_notes(I, p, verbose=verbose, **kwargs)

    ns, nl, nv = get_notes(I, p, nuclei, verbose=verbose, **kwargs)

    if verbose:
        print('== Found {} notes.'.format(len(nv)))

    if quantise:
        if verbose:
            print('- quantising note values')
        nv = quantise_notes(nv)

    if verbose:
        print('\n== Done')

    return ns, nl, nv
Esempio n. 7
0
echo_conf.ECHO_NEST_API_KEY = api_key
path = sys.argv[1]
output_file = sys.argv[2]

files = []

# find files that need to be analyzed
for dirpath, dirnames, filenames in os.walk(path):
    for filename in [f for f in filenames if f.endswith(".mp3")]:
        files.append(os.path.join(dirpath, filename))

all_files = []
count = 0

for filename in files:
    metadata_reader = MetadataReader(filename=filename)
    metadata = metadata_reader()
    track_title = metadata[0]
    track_artist = metadata[1]
    track_year = metadata[6]
    while True:
        try:
            pytrack = echo_track.track_from_filename(filename)
            pytrack.get_analysis()
            break
        except:
            print "Error encountered"
            time.sleep(60)
    echonest_id = pytrack.id
    count += 1
    print "Files analyzed: ", count