Beispiel #1
0
def analyze_formants(corpus_context, sound_file, sound_file_path):
    if getattr(corpus_context.config, 'praat_path', None) is not None:
        formant_function = partial(PraatFormants,
                                   praatpath=corpus_context.config.praat_path,
                                   max_freq=5500,
                                   num_formants=5,
                                   win_len=0.025,
                                   time_step=0.01)
        algorithm = 'praat'
    else:
        formant_function = partial(ASFormants,
                                   max_freq=5500,
                                   num_formants=5,
                                   win_len=0.025,
                                   time_step=0.01)
        algorithm = 'acousticsim'
    if os.path.isdir(sound_file_path):
        path_mapping = [(os.path.join(sound_file_path, x), )
                        for x in os.listdir(sound_file_path)]

        cache = generate_cache(path_mapping, formant_function, None,
                               default_njobs(), None, None)
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin  # true timepoint
                f1, f2, f3 = sanitize_formants(value)
                f = Formants(sound_file=sound_file,
                             time=timepoint,
                             F1=f1,
                             F2=f2,
                             F3=f3,
                             source=algorithm)
                corpus_context.sql_session.add(f)
    else:
        formants = formant_function(sound_file_path)
        for timepoint, value in formants.items():
            f1, f2, f3 = sanitize_formants(value)
            f = Formants(sound_file=sound_file,
                         time=timepoint,
                         F1=f1,
                         F2=f2,
                         F3=f3,
                         source=algorithm)
            corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file, sound_file_path):
    if getattr(corpus_context.config, 'reaper_path', None) is not None:
        pitch_function = partial(ReaperPitch, reaper = corpus_context.config.reaper_path,
                                time_step = 0.01, freq_lims = (75,500))
        algorithm = 'reaper'
    elif getattr(corpus_context.config, 'praat_path', None) is not None:
        pitch_function = partial(PraatPitch, praatpath = corpus_context.config.praat_path,
                                time_step = 0.01, freq_lims = (75,500))
        algorithm = 'praat'
    else:
        pitch_function = partial(ASPitch, time_step = 0.01, freq_lims = (75,500))
        algorithm = 'acousticsim'
    if os.path.isdir(sound_file_path):
        path_mapping = [(os.path.join(sound_file_path, x),) for x in os.listdir(sound_file_path)]

        cache = generate_cache(path_mapping, pitch_function, None, default_njobs(), None, None)
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin # true timepoint
                try:
                    value = value[0]
                except TypeError:
                    pass
                p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm)
                corpus_context.sql_session.add(p)
    else:
        pitch = pitch_function(sound_file_path)
        pitch.process()
        for timepoint, value in pitch.items():
            try:
                value = value[0]
            except TypeError:
                pass
            p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm)
            corpus_context.sql_session.add(p)
def analyze_formants(corpus_context, sound_file, sound_file_path):
    if getattr(corpus_context.config, 'praat_path', None) is not None:
        formant_function = partial(PraatFormants,
                            praatpath = corpus_context.config.praat_path,
                            max_freq = 5500, num_formants = 5, win_len = 0.025,
                            time_step = 0.01)
        algorithm = 'praat'
    else:
        formant_function = partial(ASFormants, max_freq = 5500,
                            num_formants = 5, win_len = 0.025,
                            time_step = 0.01)
        algorithm = 'acousticsim'
    if os.path.isdir(sound_file_path):
        path_mapping = [(os.path.join(sound_file_path, x),) for x in os.listdir(sound_file_path)]

        cache = generate_cache(path_mapping, formant_function, None, default_njobs(), None, None)
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin # true timepoint
                f1, f2, f3 = sanitize_formants(value)
                f = Formants(sound_file = sound_file, time = timepoint, F1 = f1,
                        F2 = f2, F3 = f3, source = algorithm)
                corpus_context.sql_session.add(f)
    else:
        formants = formant_function(sound_file_path)
        for timepoint, value in formants.items():
            f1, f2, f3 = sanitize_formants(value)
            f = Formants(sound_file = sound_file, time = timepoint, F1 = f1,
                    F2 = f2, F3 = f3, source = algorithm)
            corpus_context.sql_session.add(f)
Beispiel #4
0
def analyze_formants(corpus_context, sound_file):
    """ 
    Analyzes the formants using different algorithms based on the corpus the sound file is from 

    Parameters 
    ----------
    corpus_context : : class: `polyglotdb.corpus.BaseContext`
        the type of corpus
    sound_file : : class: `polyglotdb.sql.models.SoundFile`
        the .wav sound file
    """
    algorithm = corpus_context.config.formant_algorithm
    if algorithm == 'praat':
        if getattr(corpus_context.config, 'praat_path', None) is not None:
            formant_function = partial(
                PraatFormants,
                praatpath=corpus_context.config.praat_path,
                max_freq=5500,
                num_formants=5,
                win_len=0.025,
                time_step=0.01)
        else:
            return
    else:
        formant_function = partial(ASFormants,
                                   max_freq=5500,
                                   num_formants=5,
                                   win_len=0.025,
                                   time_step=0.01)
    if sound_file.duration > 5:
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(
            prob_utt.discourse.name == sound_file.discourse.name).times()
        utterances = q.all()

        outdir = corpus_context.config.temporary_directory(
            sound_file.discourse.name)
        path_mapping = []
        for i, u in enumerate(utterances):
            outpath = os.path.join(
                outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end']))
            if not os.path.exists(outpath):
                extract_audio(sound_file.filepath,
                              outpath,
                              u['begin'],
                              u['end'],
                              padding=padding)
            path_mapping.append((outpath, ))

        cache = generate_cache(path_mapping, formant_function, None,
                               default_njobs() - 1, None, None)
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin  # true timepoint
                f1, f2, f3 = sanitize_formants(value)
                f = Formants(sound_file=sound_file,
                             time=timepoint,
                             F1=f1,
                             F2=f2,
                             F3=f3,
                             source=algorithm)
                corpus_context.sql_session.add(f)
    else:
        formants = formant_function(sound_file.filepath)
        for timepoint, value in formants.items():
            f1, f2, f3 = sanitize_formants(value)
            f = Formants(sound_file=sound_file,
                         time=timepoint,
                         F1=f1,
                         F2=f2,
                         F3=f3,
                         source=algorithm)
            corpus_context.sql_session.add(f)
Beispiel #5
0
def analyze_pitch(corpus_context, sound_file):
    """ 
    Analyzes the pitch using different algorithms based on the corpus the sound file is from 
    
    Parameters
    ----------
    corpus_context : : class: `polyglotdb.corpus.BaseContext`
        the type of corpus
    sound_file : : class: `polyglotdb.sql.models.SoundFile`
        the .wav sound file
    """
    algorithm = corpus_context.config.pitch_algorithm
    if algorithm == 'reaper':
        if getattr(corpus_context.config, 'reaper_path', None) is not None:
            pitch_function = partial(ReaperPitch,
                                     reaper=corpus_context.config.reaper_path,
                                     time_step=0.01,
                                     freq_lims=(75, 500))
        else:
            return
    elif algorithm == 'praat':
        if getattr(corpus_context.config, 'praat_path', None) is not None:
            pitch_function = partial(
                PraatPitch,
                praatpath=corpus_context.config.praat_path,
                time_step=0.01,
                freq_lims=(75, 500))
        else:
            return
    else:
        pitch_function = partial(ASPitch, time_step=0.01, freq_lims=(75, 500))

    if sound_file.duration > 5:
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(
            prob_utt.discourse.name == sound_file.discourse.name).times()
        utterances = q.all()

        outdir = corpus_context.config.temporary_directory(
            sound_file.discourse.name)
        for i, u in enumerate(utterances):
            outpath = os.path.join(
                outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end']))
            if not os.path.exists(outpath):
                extract_audio(sound_file.filepath,
                              outpath,
                              u['begin'],
                              u['end'],
                              padding=padding * 3)

        path_mapping = [(os.path.join(outdir, x), )
                        for x in os.listdir(outdir)]
        try:
            cache = generate_cache(path_mapping, pitch_function, None,
                                   default_njobs() - 1, None, None)
        except FileNotFoundError:
            return
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding * 3
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin  # true timepoint
                try:
                    value = value[0]
                except TypeError:
                    pass
                p = Pitch(sound_file=sound_file,
                          time=timepoint,
                          F0=value,
                          source=algorithm)
                corpus_context.sql_session.add(p)
    else:
        try:
            pitch = pitch_function(sound_file.filepath)
        except FileNotFoundError:
            return
        for timepoint, value in pitch.items():
            try:
                value = value[0]
            except TypeError:
                pass
            p = Pitch(sound_file=sound_file,
                      time=timepoint,
                      F0=value,
                      source=algorithm)
            corpus_context.sql_session.add(p)
    corpus_context.sql_session.flush()
Beispiel #6
0
def analyze_formants(corpus_context, sound_file):
    """ 
    Analyzes the formants using different algorithms based on the corpus the sound file is from 

    Parameters 
    ----------
    corpus_context : : class: `polyglotdb.corpus.BaseContext`
        the type of corpus
    sound_file : : class: `polyglotdb.sql.models.SoundFile`
        the .wav sound file
    """
    algorithm = corpus_context.config.formant_algorithm
    if algorithm == 'praat':
        if getattr(corpus_context.config, 'praat_path', None) is not None:
            formant_function = partial(PraatFormants,
                                praatpath = corpus_context.config.praat_path,
                                max_freq = 5500, num_formants = 5, win_len = 0.025,
                                time_step = 0.01)
        else:
            return
    else:
        formant_function = partial(ASFormants, max_freq = 5500,
                            num_formants = 5, win_len = 0.025,
                            time_step = 0.01)
    if sound_file.duration > 5:
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times()
        utterances = q.all()

        outdir = corpus_context.config.temporary_directory(sound_file.discourse.name)
        path_mapping = []
        for i, u in enumerate(utterances):
            outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end']))
            if not os.path.exists(outpath):
                extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding)
            path_mapping.append((outpath,))

        cache = generate_cache(path_mapping, formant_function, None, default_njobs() - 1, None, None)
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin # true timepoint
                f1, f2, f3 = sanitize_formants(value)
                f = Formants(sound_file = sound_file, time = timepoint, F1 = f1,
                        F2 = f2, F3 = f3, source = algorithm)
                corpus_context.sql_session.add(f)
    else:
        formants = formant_function(sound_file.filepath)
        for timepoint, value in formants.items():
            f1, f2, f3 = sanitize_formants(value)
            f = Formants(sound_file = sound_file, time = timepoint, F1 = f1,
                    F2 = f2, F3 = f3, source = algorithm)
            corpus_context.sql_session.add(f)
Beispiel #7
0
def analyze_pitch(corpus_context, sound_file):
    """ 
    Analyzes the pitch using different algorithms based on the corpus the sound file is from 
    
    Parameters
    ----------
    corpus_context : : class: `polyglotdb.corpus.BaseContext`
        the type of corpus
    sound_file : : class: `polyglotdb.sql.models.SoundFile`
        the .wav sound file
    """
    algorithm = corpus_context.config.pitch_algorithm
    if algorithm == 'reaper':
        if getattr(corpus_context.config, 'reaper_path', None) is not None:
            pitch_function = partial(ReaperPitch, reaper = corpus_context.config.reaper_path,
                                    time_step = 0.01, freq_lims = (75,500))
        else:
            return
    elif algorithm == 'praat':
        if getattr(corpus_context.config, 'praat_path', None) is not None:
            pitch_function = partial(PraatPitch, praatpath = corpus_context.config.praat_path,
                                time_step = 0.01, freq_lims = (75,500))
        else:
            return
    else:
        pitch_function = partial(ASPitch, time_step = 0.01, freq_lims = (75,500))

    if sound_file.duration > 5:
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times()
        utterances = q.all()

        outdir = corpus_context.config.temporary_directory(sound_file.discourse.name)
        for i, u in enumerate(utterances):
            outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end']))
            if not os.path.exists(outpath):
                extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding * 3)

        path_mapping = [(os.path.join(outdir, x),) for x in os.listdir(outdir)]
        try:
            cache = generate_cache(path_mapping, pitch_function, None, default_njobs() - 1, None, None)
        except FileNotFoundError:
            return
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding * 3
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin # true timepoint
                try:
                    value = value[0]
                except TypeError:
                    pass
                p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm)
                corpus_context.sql_session.add(p)
    else:
        try:
            pitch = pitch_function(sound_file.filepath)
        except FileNotFoundError:
            return
        for timepoint, value in pitch.items():
            try:
                value = value[0]
            except TypeError:
                pass
            p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm)
            corpus_context.sql_session.add(p)
    corpus_context.sql_session.flush()
def acoustic_similarity_mapping(path_mapping, **kwargs):
    """Takes in an explicit mapping of full paths to .wav files to have
    acoustic similarity computed.

    Parameters
    ----------
    path_mapping : iterable of iterables
        Explicit mapping of full paths of .wav files, in the form of a
        list of tuples to be compared.
    rep : {'envelopes','mfcc'}, optional
        The type of representation to convert the wav files into before
        comparing for similarity.  Amplitude envelopes will be computed
        when 'envelopes' is specified, and MFCCs will be computed when
        'mfcc' is specified (default).
    match_function : {'dtw', 'xcorr'}, optional
        How similarity/distance will be calculated.  Defaults to 'dtw' to
        use Dynamic Time Warping (can be slower) to compute distance.
        Cross-correlation can be specified with 'xcorr', which computes
        distance as the inverse of a maximum cross-correlation value
        between 0 and 1.
    num_filters : int, optional
        The number of frequency filters to use when computing representations.
        Defaults to 8 for amplitude envelopes and 26 for MFCCs.
    num_coeffs : int, optional
        The number of coefficients to use for MFCCs (not used for
        amplitude envelopes).  Default is 20, which captures speaker-
        specific information, whereas 12 would be more speaker-independent.
    freq_lims : tuple, optional
        A tuple of the minimum frequency and maximum frequency in Hertz to use
        for computing representations.  Defaults to (80, 7800) following
        Lewandowski (2012).
    output_sim : bool, optional
        If true (default), the function will return similarities (inverse distance).
        If false, distance measures will be returned instead.

    Returns
    -------
    list of tuples
        Returns a list of tuples corresponding to the `path_mapping` input,
        with a new final element in the tuple being the similarity/distance
        score for that mapping.

    """

    stop_check = kwargs.get('stop_check',None)
    call_back = kwargs.get('call_back',None)
    rep = kwargs.get('rep','mfcc')
    if callable(rep):
        to_rep = rep
    else:
        to_rep = _build_to_rep(**kwargs)

    if kwargs.get('use_multi',False):
        num_cores = kwargs.get('num_cores', 1)
        if num_cores == 0:
            num_cores = int((3*cpu_count())/4)
    else:
        num_cores = 1
    output_sim = kwargs.get('output_sim',False)

    match_function = kwargs.get('match_function', 'dtw')
    cache = kwargs.get('cache',None)
    if isinstance(match_function, str):
        if match_function == 'xcorr':
            dist_func = xcorr_distance
        elif match_function == 'dct':
            dist_func = dct_distance
        else:
            dist_func = dtw_distance
    elif callable(match_function):
        dist_func = match_function

    attributes = kwargs.get('attributes',dict())
    if cache is None:
        cache = generate_cache(path_mapping, to_rep, attributes, num_cores, call_back, stop_check)

    asim = calc_asim(path_mapping,cache,dist_func, output_sim,num_cores, call_back, stop_check)
    if kwargs.get('return_rep',False):
        return asim, cache

    return asim
Beispiel #9
0
def analyze_pitch(corpus_context, sound_file, sound_file_path):
    if getattr(corpus_context.config, 'reaper_path', None) is not None:
        pitch_function = partial(ReaperPitch,
                                 reaper=corpus_context.config.reaper_path,
                                 time_step=0.01,
                                 freq_lims=(75, 500))
        algorithm = 'reaper'
        if corpus_context.config.reaper_path is None:
            return
    elif getattr(corpus_context.config, 'praat_path', None) is not None:
        pitch_function = partial(PraatPitch,
                                 praatpath=corpus_context.config.praat_path,
                                 time_step=0.01,
                                 freq_lims=(75, 500))
        algorithm = 'praat'
    else:
        pitch_function = partial(ASPitch, time_step=0.01, freq_lims=(75, 500))
        algorithm = 'acousticsim'
    if os.path.isdir(sound_file_path):
        path_mapping = [(os.path.join(sound_file_path, x), )
                        for x in os.listdir(sound_file_path)]
        try:
            cache = generate_cache(path_mapping, pitch_function, None,
                                   default_njobs(), None, None)
        except FileNotFoundError:
            return
        for k, v in cache.items():
            name = os.path.basename(k)
            name = os.path.splitext(name)[0]
            _, begin, end = name.split('-')
            begin = float(begin) - padding
            if begin < 0:
                begin = 0
            end = float(end)
            for timepoint, value in v.items():
                timepoint += begin  # true timepoint
                try:
                    value = value[0]
                except TypeError:
                    pass
                p = Pitch(sound_file=sound_file,
                          time=timepoint,
                          F0=value,
                          source=algorithm)
                corpus_context.sql_session.add(p)
    else:
        try:
            pitch = pitch_function(sound_file_path)
        except FileNotFoundError:
            return
        for timepoint, value in pitch.items():
            try:
                value = value[0]
            except TypeError:
                pass
            p = Pitch(sound_file=sound_file,
                      time=timepoint,
                      F0=value,
                      source=algorithm)
            corpus_context.sql_session.add(p)
    corpus_context.sql_session.flush()