def acoustic_analysis(corpus_context): sound_files = corpus_context.sql_session.query(SoundFile).join(Discourse).all() log = logging.getLogger('{}_acoustics'.format(corpus_context.corpus_name)) log.info('Beginning acoustic analysis for {} corpus...'.format(corpus_context.corpus_name)) initial_begin = time.time() for sf in sound_files: log.info('Begin acoustic analysis for {}...'.format(sf.filepath)) log_begin = time.time() try: q = corpus_context.query_graph(corpus_context.utterance) q = q.filter(corpus_context.utterance.discourse == sf.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory(sf.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u.begin, u.end)) extract_audio(sf.filepath, outpath, u.begin, u.end, padding = padding) path = outdir except GraphQueryError: path = sf.filepath analyze_pitch(corpus_context, sf, path) #analyze_formants(corpus_context, sf, path) log.info('Acoustic analysis finished!') log.debug('Acoustic analysis took: {} seconds'.format(time.time() - log_begin)) break log.info('Finished acoustic analysis for {} corpus!'.format(corpus_context.corpus_name)) log.debug('Total time taken: {} seconds'.format(time.time() - initial_begin))
def acoustic_analysis(corpus_context): sound_files = corpus_context.sql_session.query(SoundFile).join( Discourse).all() log = logging.getLogger('{}_acoustics'.format(corpus_context.corpus_name)) log.info('Beginning acoustic analysis for {} corpus...'.format( corpus_context.corpus_name)) initial_begin = time.time() for sf in sound_files: log.info('Begin acoustic analysis for {}...'.format(sf.filepath)) log_begin = time.time() try: q = corpus_context.query_graph(corpus_context.utterance) q = q.filter(corpus_context.utterance.discourse == sf.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory( sf.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u.begin, u.end)) extract_audio(sf.filepath, outpath, u.begin, u.end, padding=padding) path = outdir except GraphQueryError: path = sf.filepath analyze_pitch(corpus_context, sf, path) #analyze_formants(corpus_context, sf, path) log.info('Acoustic analysis finished!') log.debug('Acoustic analysis took: {} seconds'.format(time.time() - log_begin)) break log.info('Finished acoustic analysis for {} corpus!'.format( corpus_context.corpus_name)) log.debug('Total time taken: {} seconds'.format(time.time() - initial_begin))
def analyze_formants(corpus_context, sound_file): """ Analyzes the formants using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.formant_algorithm if algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial( PraatFormants, praatpath=corpus_context.config.praat_path, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) else: return else: formant_function = partial(ASFormants, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter( prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory( sound_file.discourse.name) path_mapping = [] for i, u in enumerate(utterances): outpath = os.path.join( outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding=padding) path_mapping.append((outpath, )) cache = generate_cache(path_mapping, formant_function, None, default_njobs() - 1, None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file.filepath) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file): """ Analyzes the pitch using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.pitch_algorithm if algorithm == 'reaper': if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper=corpus_context.config.reaper_path, time_step=0.01, freq_lims=(75, 500)) else: return elif algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial( PraatPitch, praatpath=corpus_context.config.praat_path, time_step=0.01, freq_lims=(75, 500)) else: return else: pitch_function = partial(ASPitch, time_step=0.01, freq_lims=(75, 500)) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter( prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory( sound_file.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join( outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding=padding * 3) path_mapping = [(os.path.join(outdir, x), ) for x in os.listdir(outdir)] try: cache = generate_cache(path_mapping, pitch_function, None, default_njobs() - 1, None, None) except FileNotFoundError: return for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding * 3 if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) else: try: pitch = pitch_function(sound_file.filepath) except FileNotFoundError: return for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) corpus_context.sql_session.flush()
def analyze_formants(corpus_context, sound_file): """ Analyzes the formants using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.formant_algorithm if algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial(PraatFormants, praatpath = corpus_context.config.praat_path, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) else: return else: formant_function = partial(ASFormants, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory(sound_file.discourse.name) path_mapping = [] for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding) path_mapping.append((outpath,)) cache = generate_cache(path_mapping, formant_function, None, default_njobs() - 1, None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file.filepath) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file): """ Analyzes the pitch using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.pitch_algorithm if algorithm == 'reaper': if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper = corpus_context.config.reaper_path, time_step = 0.01, freq_lims = (75,500)) else: return elif algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial(PraatPitch, praatpath = corpus_context.config.praat_path, time_step = 0.01, freq_lims = (75,500)) else: return else: pitch_function = partial(ASPitch, time_step = 0.01, freq_lims = (75,500)) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory(sound_file.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding * 3) path_mapping = [(os.path.join(outdir, x),) for x in os.listdir(outdir)] try: cache = generate_cache(path_mapping, pitch_function, None, default_njobs() - 1, None, None) except FileNotFoundError: return for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding * 3 if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p) else: try: pitch = pitch_function(sound_file.filepath) except FileNotFoundError: return for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p) corpus_context.sql_session.flush()
print([(x._type_node['label'], x.begin, x.duration) for x in u.word]) error transcription = ' '.join(x[0] for x in transcription) begin = u.begin - 0.075 if begin < 0: begin = 0 end = u.end + 0.075 if end > duration: end = duration utterance_tier.add(begin, end, transcription) utt_duration = end - begin utt_name = '{}_{}_{}'.format(d, begin, end) utt_wav_path = os.path.join(lab_dir, utt_name + '.wav') if not os.path.exists(utt_wav_path): extract_audio(wav_path, utt_wav_path, begin, end, padding=0) lab_path = os.path.join(lab_dir, utt_name + '.lab') with open(lab_path, 'w') as f: f.write(transcription) trans_path = os.path.join(lab_dir, utt_name + '.txt') with open(trans_path, 'w') as f: f.write('{}\t{}\t0\t{}\t{}'.format(speaker, speaker, utt_duration, transcription)) utt_tg_path = os.path.join(lab_dir, utt_name + '.TextGrid') utt_tg = TextGrid(maxTime=utt_duration) utt_word_tier = IntervalTier(name='words', maxTime=utt_duration) utt_phone_tier = IntervalTier(name='phones', maxTime=utt_duration) for w in u.word: label = w._type_node['label'] if label in ['<NOISE>', '<VOCNOISE>']:
durations.append(interval.maxTime - interval.minTime) max_duration = max(durations) min_duration = min(durations) min_thresh = 0.01 max_thresh = 0.05 segs = [] for interval in word_tier: if interval.mark == '': continue print(interval.mark, interval.minTime, interval.maxTime) outpath = os.path.join(temp_wav_dir, interval.mark + '.wav') extract_audio(wav_path, outpath, interval.minTime, interval.maxTime, padding=padding) rep = Mfcc(outpath, freq_lims=(80, 7800), num_coeffs=12, win_len=0.025, time_step=0.01) rep.is_windowed = True duration = interval.maxTime - interval.minTime thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh, max_thresh) rep.segment(threshold=thresh) print(sorted(rep._segments.keys())) padded_begin = interval.minTime - padding if padded_begin < 0:
if interval.mark == '': continue durations.append(interval.maxTime - interval.minTime) max_duration = max(durations) min_duration = min(durations) min_thresh = 0.01 max_thresh = 0.05 segs = [] for interval in word_tier: if interval.mark == '': continue print(interval.mark, interval.minTime, interval.maxTime) outpath = os.path.join(temp_wav_dir, interval.mark + '.wav') extract_audio(wav_path, outpath, interval.minTime, interval.maxTime, padding = padding) rep = Mfcc(outpath, freq_lims = (80, 7800), num_coeffs = 12, win_len = 0.025, time_step = 0.01) rep.is_windowed = True duration = interval.maxTime - interval.minTime thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh, max_thresh) rep.segment(threshold = thresh) print(sorted(rep._segments.keys())) padded_begin = interval.minTime - padding if padded_begin < 0: padded_begin = 0 for k in sorted(rep._segments.keys()): with open(os.path.join(temp_mfcc_dir, '{}.mfcc'.format(seg_ind)), 'wb') as fh: pickle.dump(rep[k[0],k[1]], fh) with open(os.path.join(temp_mean_dir, '{}.mean'.format(seg_ind)), 'wb') as fh: pickle.dump(rep._segments[k], fh) segs.append(str(seg_ind))