def hcopy_parallel(scp_file, args): ''' Check whether we are on a cluster with a scheduler system installed or we are just on a single machine ''' # Read the list of data and split into multiple chunks if which('qsub') == None: hcopy_parallel_multicore(scp_file, args) else: hcopy_parallel_cluster(scp_file, args)
def hcopy_parallel(scp_file, args): ''' Check whether we are on a cluster with a scheduler system installed or we are just on a single machine ''' # Read the list of data and split into multiple chunks if which('qsub') == None: hcopy_parallel_multicore(scp_file,args) else: hcopy_parallel_cluster(scp_file,args)
def create_ergodic_hmm_matlab(n_dims, n_states, feat_type, model_name, covar_type='diag', model_dir=None): if model_dir == None: model_dir = os.getcwd() else: if not os.path.exists(model_dir): os.makedirs(model_dir) args = '{dims},{n_states},{feat},{name},{dir}'.format(dims=n_dims,n_states=n_states, feat=add_slashes(feat_type), name=add_slashes(model_name), dir=add_slashes(model_dir)) cmd = ['matlab_batcher.sh','define_HTK_HMM',args] assert(which('matlab_batcher.sh') != None) logging.debug(string.join(cmd,' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
def vad_evaluate_darpa(testing_list=None, ref_annotations_list=None, hyp_annotations_list=None, eval_script=None, audio_dir=None, smp_period=0.01, window_length=0.02, task_id='SAD', results_dir=None): ''' Run the java DARPA evaluation script by calling the relevant MATLAB function. The MATLAB function needs to be in MATLAB path. A shell script (matlab_batcher.sh) that also needs to be in path is called to run the matlab command from the shell. Input: testing_list : list of audio files (absolute paths) ref_annotations_list : list of reference annotations (absolute paths) hyp_annotations_list : list of hypothesized annotations (absolute paths) eval_script : the java evaluation script audio_dir : directory where the audio files lie smp_period : frame period (in seconds) window_length : frame length (in seconds) task_id : an id for the task that is evaluated results_dir : directory where the results will be stored ''' evaluation_scp_file = os.path.join(results_dir, 'evaluation.scp') test_files = lists.get_contents(testing_list) ref_files = lists.get_contents(ref_annotations_list) hyp_files = lists.get_contents(hyp_annotations_list) eval_scp = open(evaluation_scp_file, 'w') for t, r, h in map(None, test_files, ref_files, hyp_files): eval_scp.write('{} {} {}\n'.format(t, r, h)) eval_scp.close() darpa_convert_labels_to_ids_file_list(hyp_annotations_list) args = '{in_file},{script_path},{audio_dir},{working_dir},{task_id},{smp_period},{window_length}'.format( in_file=add_quotes(evaluation_scp_file), script_path=add_quotes(eval_script), audio_dir=add_quotes(audio_dir), working_dir=add_quotes(results_dir), task_id=add_quotes(task_id), smp_period=smp_period, window_length=window_length) cmd = ['matlab_batcher.sh', 'FindPercentageFromResultFiles', args] assert (which.which('matlab_batcher.sh') != None) logging.debug(string.join(cmd, ' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
def initialize_gmm_kmeans(model_name, data_list, label_dir, in_model_dir, out_model_dir=None ): if out_model_dir==None: out_model_dir = in_model_dir else: if not os.path.exists(out_model_dir): os.makedirs(out_model_dir) model_file = os.path.join(in_model_dir, model_name) args = '{model_file},{data_list},{label_dir},{out_model_dir}'.format(model_file=add_quotes(model_file), data_list=add_quotes(data_list), label_dir=add_quotes(label_dir), out_model_dir=add_quotes(out_model_dir)) cmd = ['matlab_batcher.sh','initialize_HTK_GMM',args] assert(which.which('matlab_batcher.sh') != None) logging.debug(string.join(cmd,' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
def vad_evaluate_darpa(testing_list=None, ref_annotations_list=None, hyp_annotations_list=None, eval_script=None, audio_dir=None, smp_period=0.01, window_length=0.02, task_id='SAD', results_dir=None): ''' Run the java DARPA evaluation script by calling the relevant MATLAB function. The MATLAB function needs to be in MATLAB path. A shell script (matlab_batcher.sh) that also needs to be in path is called to run the matlab command from the shell. Input: testing_list : list of audio files (absolute paths) ref_annotations_list : list of reference annotations (absolute paths) hyp_annotations_list : list of hypothesized annotations (absolute paths) eval_script : the java evaluation script audio_dir : directory where the audio files lie smp_period : frame period (in seconds) window_length : frame length (in seconds) task_id : an id for the task that is evaluated results_dir : directory where the results will be stored ''' evaluation_scp_file = os.path.join(results_dir,'evaluation.scp') test_files = lists.get_contents(testing_list) ref_files = lists.get_contents(ref_annotations_list) hyp_files = lists.get_contents(hyp_annotations_list) eval_scp = open(evaluation_scp_file,'w') for t, r, h in map(None, test_files, ref_files, hyp_files): eval_scp.write('{} {} {}\n'.format(t, r, h)) eval_scp.close() darpa_convert_labels_to_ids_file_list(hyp_annotations_list) args = '{in_file},{script_path},{audio_dir},{working_dir},{task_id},{smp_period},{window_length}'.format(in_file=add_quotes(evaluation_scp_file), script_path=add_quotes(eval_script), audio_dir=add_quotes(audio_dir), working_dir=add_quotes(results_dir), task_id=add_quotes(task_id), smp_period=smp_period, window_length=window_length) cmd = ['matlab_batcher.sh','FindPercentageFromResultFiles',args] assert(which.which('matlab_batcher.sh') != None) logging.debug(string.join(cmd,' ')) logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))
def create_ergodic_hmm_matlab(n_dims, n_states, feat_type, model_name, covar_type='diag', model_dir=None): if model_dir == None: model_dir = os.getcwd() else: if not os.path.exists(model_dir): os.makedirs(model_dir) args = '{dims},{n_states},{feat},{name},{dir}'.format( dims=n_dims, n_states=n_states, feat=add_slashes(feat_type), name=add_slashes(model_name), dir=add_slashes(model_dir)) cmd = ['matlab_batcher.sh', 'define_HTK_HMM', args] assert (which('matlab_batcher.sh') != None) logging.debug(string.join(cmd, ' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
def initialize_gmm_kmeans(model_name, data_list, label_dir, in_model_dir, out_model_dir=None): if out_model_dir == None: out_model_dir = in_model_dir else: if not os.path.exists(out_model_dir): os.makedirs(out_model_dir) model_file = os.path.join(in_model_dir, model_name) args = '{model_file},{data_list},{label_dir},{out_model_dir}'.format( model_file=add_quotes(model_file), data_list=add_quotes(data_list), label_dir=add_quotes(label_dir), out_model_dir=add_quotes(out_model_dir)) cmd = ['matlab_batcher.sh', 'initialize_HTK_GMM', args] assert (which.which('matlab_batcher.sh') != None) logging.debug(string.join(cmd, ' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
Description: Train a voice activity detection Language Model. nassos@SAIL ''' import argparse import logging import string import subprocess import os from my_utils import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' ngram_bin = which.which('ngram-count') def ngram_count(args): cmd = args cmd.insert(0, ngram_bin) logging.debug(string.join(cmd, ' ')) logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT)) def ldc_annotations_to_symbol_sequences(label_map, ldc_annotations_list, samp_period, out_file): ''' Convert LDC annotations to sequences of symbols that can be given to SRILM to estimate the n-gram models '''
Created on Dec 27, 2011 @author: Nassos Katsamanis, PhD ''' from multiprocessing import Process, cpu_count, Queue import sys import os import itertools import math import subprocess import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hrest_bin = which('HRest') def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def hrest(args): get_scp_file = False scp_file_found = False already_in_parallel_mode = False scp_file = '' argums = [] for arg in args:
Created on Dec 27, 2011 @author: Nassos Katsamanis, PhD ''' from multiprocessing import Process, cpu_count, Queue import sys import os import itertools import math import subprocess import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' herest_bin = which('HERest') def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def herest(args): get_scp_file = False scp_file_found = False already_in_parallel_mode = False scp_file = '' argums = [] for arg in args: if arg=='-S': get_scp_file = True
''' Created on Dec 28, 2011 @author: nassos ''' import os import subprocess import argparse import textwrap import logging import string from my_utils import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hinit_bin = which.which('HInit') hcompv_bin = which.which('HCompV') n_iterations = 10 def add_quotes(string_var): string_var = "\'"+string_var+"\'" return string_var def estimate_minimum_variances(feature_file_list, model_file, out_dir, min_var_factor=0.01): ''' Use HCompV to generate the vFloors file that contains the minimum variances allowed for the models to be trained ''' h_cfg_file = os.path.join(out_dir,'hcompv.cfg') h_cfg = open(h_cfg_file,'w') h_cfg.write('MINVARFLOOR = 0.001') h_cfg.close()
Created on Dec 27, 2011 @author: Nassos Katsamanis, PhD ''' from multiprocessing import Process, cpu_count, Queue import sys import os import itertools import math import subprocess import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' herest_bin = which('HERest') def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def herest(args): get_scp_file = False scp_file_found = False already_in_parallel_mode = False scp_file = '' argums = [] for arg in args:
''' Created on Dec 29, 2011 @author: nassos ''' import os from my_utils import which import subprocess os.environ['PATH'] += os.pathsep + '/usr/local/bin' hparse_bin = which.which('HParse') def create_trivial_grammar(labels, grammar_file, mode='sequence'): gr_file = open(grammar_file,'w') gr_file.write('$class = ') gr_file.write(labels[0]) for l in labels[1:]: gr_file.write(' | '+l) gr_file.write(';\n') if mode=='sequence': gr_file.write('(<$class>)') elif mode=='single': gr_file.write('($class)') gr_file.close() def create_trivial_wordnet(labels,wdnet_file, mode='sequence'): grammar_file = wdnet_file+'.gram' create_trivial_grammar(labels,grammar_file, mode) cmd = [hparse_bin,grammar_file,wdnet_file] print(os.getcwd()) print(cmd)
designed to run transparently. @author: Nassos Katsamanis, PhD ''' import itertools import math from multiprocessing import Process, cpu_count import subprocess import sys import os import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hcopy_bin = which('HCopy') hcopy_par = which('HCopy.pl') assert(os.path.exists(hcopy_bin)) def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def hcopy(args): get_scp_file = False scp_file_found = False scp_file = '' argums = [] for arg in args: if arg=='-S':
designed to run transparently. @author: Nassos Katsamanis, PhD ''' import itertools import math from multiprocessing import Process, cpu_count import subprocess import sys import os import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hcopy_bin = which('HCopy') hcopy_par = which('HCopy.pl') assert (os.path.exists(hcopy_bin)) def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def hcopy(args): get_scp_file = False scp_file_found = False scp_file = '' argums = []
""" Created on Dec 28, 2011 @author: nassos """ import logging import os import string import subprocess from htk import hrest, herest from my_utils import which from classifiers import initialize_gmm os.environ["PATH"] += os.pathsep + "/usr/local/bin" hhed_bin = which.which("HHEd") def hhed(args): cmd = args cmd.insert(0, hhed_bin) logging.debug(string.join(cmd, " ")) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT)) def increase_n_components( n_mixes, model_name, model_file, training_list, lab_dir,
Created on Dec 27, 2011 @author: Nassos Katsamanis, PhD ''' from multiprocessing import Process, cpu_count, Queue import sys import os import itertools import math import subprocess import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hrest_bin = which('HRest') def grouper(n, iterable, fillvalue=None): #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def hrest(args): get_scp_file = False scp_file_found = False already_in_parallel_mode = False scp_file = '' argums = [] for arg in args: if arg=='-S': get_scp_file = True
designed to run transparently. @author: Nassos Katsamanis, PhD ''' import itertools import math from multiprocessing import Process, cpu_count import subprocess import sys import os import logging import string from my_utils.which import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hvite_bin = which('HVite') hvite_par = which('HVite.pl') def concatenate_mlfs(out_mlf, mlfs): mlf_file = open(out_mlf,'w') mlf_file.write('#!MLF!#') for fl in mlfs: if os.path.exists(fl): fl_fid = open(fl,'r') # Ignore the first line fl_fid.readline() for line in fl_fid: mlf_file.write(line) mlf_file.close()
''' Created on Dec 28, 2011 @author: nassos ''' import os import subprocess import argparse import textwrap import logging import string from my_utils import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' hinit_bin = which.which('HInit') hcompv_bin = which.which('HCompV') n_iterations = 10 def add_quotes(string_var): string_var = "\'" + string_var + "\'" return string_var def estimate_minimum_variances(feature_file_list, model_file, out_dir, min_var_factor=0.01): ''' Use HCompV to generate the vFloors file that contains the minimum variances allowed for the models to be trained
Description: Train a voice activity detection Language Model. nassos@SAIL ''' import argparse import logging import string import subprocess import os from my_utils import which os.environ['PATH'] += os.pathsep + '/usr/local/bin' ngram_bin = which.which('ngram-count') def ngram_count(args): cmd = args cmd.insert(0, ngram_bin) logging.debug(string.join(cmd,' ')) logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT)) def ldc_annotations_to_symbol_sequences(label_map, ldc_annotations_list, samp_period, out_file): ''' Convert LDC annotations to sequences of symbols that can be given to SRILM to estimate the n-gram models ''' ldc_list = open(ldc_annotations_list,'r') out = open(out_file,'w')
''' Created on Dec 28, 2011 @author: nassos ''' import logging import os import string import subprocess from htk import hrest, herest from my_utils import which from classifiers import initialize_gmm os.environ['PATH'] += os.pathsep + '/usr/local/bin' hhed_bin = which.which('HHEd') def hhed(args): cmd = args cmd.insert(0, hhed_bin) logging.debug(string.join(cmd, ' ')) logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT)) def increase_n_components(n_mixes, model_name, model_file, training_list, lab_dir, target_model_dir,
''' Created on Dec 29, 2011 @author: nassos ''' import os from my_utils import which import subprocess os.environ['PATH'] += os.pathsep + '/usr/local/bin' hparse_bin = which.which('HParse') def create_trivial_grammar(labels, grammar_file, mode='sequence'): gr_file = open(grammar_file, 'w') gr_file.write('$class = ') gr_file.write(labels[0]) for l in labels[1:]: gr_file.write(' | ' + l) gr_file.write(';\n') if mode == 'sequence': gr_file.write('(<$class>)') elif mode == 'single': gr_file.write('($class)') gr_file.close() def create_trivial_wordnet(labels, wdnet_file, mode='sequence'): grammar_file = wdnet_file + '.gram' create_trivial_grammar(labels, grammar_file, mode) cmd = [hparse_bin, grammar_file, wdnet_file]