Ejemplo n.º 1
0
def hcopy_parallel(scp_file, args):
    '''
    Check whether we are on a cluster with a scheduler system installed
    or we are just on a single machine
    '''
    # Read the list of data and split into multiple chunks
    if which('qsub') == None:
        hcopy_parallel_multicore(scp_file, args)
    else:
        hcopy_parallel_cluster(scp_file, args)
Ejemplo n.º 2
0
def hcopy_parallel(scp_file, args):
    '''
    Check whether we are on a cluster with a scheduler system installed
    or we are just on a single machine
    '''
    # Read the list of data and split into multiple chunks
    if which('qsub') == None:
        hcopy_parallel_multicore(scp_file,args)
    else:
        hcopy_parallel_cluster(scp_file,args)
Ejemplo n.º 3
0
def create_ergodic_hmm_matlab(n_dims, n_states, feat_type, model_name, covar_type='diag', model_dir=None):
    if model_dir == None:
        model_dir =  os.getcwd()
    else:
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
    args = '{dims},{n_states},{feat},{name},{dir}'.format(dims=n_dims,n_states=n_states,
                                                          feat=add_slashes(feat_type),
                                                          name=add_slashes(model_name),
                                                          dir=add_slashes(model_dir))
    cmd = ['matlab_batcher.sh','define_HTK_HMM',args]
    assert(which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd,' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
Ejemplo n.º 4
0
def vad_evaluate_darpa(testing_list=None,
                       ref_annotations_list=None,
                       hyp_annotations_list=None,
                       eval_script=None,
                       audio_dir=None,
                       smp_period=0.01,
                       window_length=0.02,
                       task_id='SAD',
                       results_dir=None):
    '''
    Run the java DARPA evaluation script by calling the relevant MATLAB function. The
    MATLAB function needs to be in MATLAB path. A shell script (matlab_batcher.sh) that
    also needs to be in path is called to run the matlab command from the shell.

    Input:
    testing_list : list of audio files (absolute paths)
    ref_annotations_list : list of reference annotations (absolute paths)
    hyp_annotations_list : list of hypothesized annotations (absolute paths)
    eval_script : the java evaluation script
    audio_dir : directory where the audio files lie
    smp_period : frame period (in seconds)
    window_length : frame length (in seconds)
    task_id : an id for the task that is evaluated
    results_dir : directory where the results will be stored
    '''
    evaluation_scp_file = os.path.join(results_dir, 'evaluation.scp')
    test_files = lists.get_contents(testing_list)
    ref_files = lists.get_contents(ref_annotations_list)
    hyp_files = lists.get_contents(hyp_annotations_list)
    eval_scp = open(evaluation_scp_file, 'w')
    for t, r, h in map(None, test_files, ref_files, hyp_files):
        eval_scp.write('{} {} {}\n'.format(t, r, h))
    eval_scp.close()

    darpa_convert_labels_to_ids_file_list(hyp_annotations_list)

    args = '{in_file},{script_path},{audio_dir},{working_dir},{task_id},{smp_period},{window_length}'.format(
        in_file=add_quotes(evaluation_scp_file),
        script_path=add_quotes(eval_script),
        audio_dir=add_quotes(audio_dir),
        working_dir=add_quotes(results_dir),
        task_id=add_quotes(task_id),
        smp_period=smp_period,
        window_length=window_length)
    cmd = ['matlab_batcher.sh', 'FindPercentageFromResultFiles', args]
    assert (which.which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd, ' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
Ejemplo n.º 5
0
def initialize_gmm_kmeans(model_name, data_list, label_dir, in_model_dir, out_model_dir=None ):
    if out_model_dir==None:
        out_model_dir = in_model_dir
    else:
        if not os.path.exists(out_model_dir):
            os.makedirs(out_model_dir)

    model_file = os.path.join(in_model_dir, model_name)
    args = '{model_file},{data_list},{label_dir},{out_model_dir}'.format(model_file=add_quotes(model_file),
                                                                         data_list=add_quotes(data_list),
                                                                         label_dir=add_quotes(label_dir),
                                                                         out_model_dir=add_quotes(out_model_dir))
    cmd = ['matlab_batcher.sh','initialize_HTK_GMM',args]
    assert(which.which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd,' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
def vad_evaluate_darpa(testing_list=None, ref_annotations_list=None, hyp_annotations_list=None,
                       eval_script=None, audio_dir=None, smp_period=0.01, window_length=0.02, 
                       task_id='SAD', results_dir=None):
    '''
    Run the java DARPA evaluation script by calling the relevant MATLAB function. The 
    MATLAB function needs to be in MATLAB path. A shell script (matlab_batcher.sh) that 
    also needs to be in path is called to run the matlab command from the shell.
    
    Input:
    testing_list : list of audio files (absolute paths)
    ref_annotations_list : list of reference annotations (absolute paths)
    hyp_annotations_list : list of hypothesized annotations (absolute paths)
    eval_script : the java evaluation script
    audio_dir : directory where the audio files lie
    smp_period : frame period (in seconds)
    window_length : frame length (in seconds)
    task_id : an id for the task that is evaluated
    results_dir : directory where the results will be stored
    '''
    evaluation_scp_file = os.path.join(results_dir,'evaluation.scp')
    test_files = lists.get_contents(testing_list)
    ref_files = lists.get_contents(ref_annotations_list)
    hyp_files = lists.get_contents(hyp_annotations_list)
    eval_scp = open(evaluation_scp_file,'w')
    for t, r, h in map(None, test_files, ref_files, hyp_files):
        eval_scp.write('{} {} {}\n'.format(t, r, h))
    eval_scp.close()

    darpa_convert_labels_to_ids_file_list(hyp_annotations_list)
    
    args = '{in_file},{script_path},{audio_dir},{working_dir},{task_id},{smp_period},{window_length}'.format(in_file=add_quotes(evaluation_scp_file),
                                                               script_path=add_quotes(eval_script),
                                                               audio_dir=add_quotes(audio_dir),
                                                               working_dir=add_quotes(results_dir),
                                                               task_id=add_quotes(task_id),
                                                               smp_period=smp_period,
                                                               window_length=window_length)
    cmd = ['matlab_batcher.sh','FindPercentageFromResultFiles',args]
    assert(which.which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd,' '))
    logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))
Ejemplo n.º 7
0
def create_ergodic_hmm_matlab(n_dims,
                              n_states,
                              feat_type,
                              model_name,
                              covar_type='diag',
                              model_dir=None):
    if model_dir == None:
        model_dir = os.getcwd()
    else:
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
    args = '{dims},{n_states},{feat},{name},{dir}'.format(
        dims=n_dims,
        n_states=n_states,
        feat=add_slashes(feat_type),
        name=add_slashes(model_name),
        dir=add_slashes(model_dir))
    cmd = ['matlab_batcher.sh', 'define_HTK_HMM', args]
    assert (which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd, ' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
Ejemplo n.º 8
0
def initialize_gmm_kmeans(model_name,
                          data_list,
                          label_dir,
                          in_model_dir,
                          out_model_dir=None):
    if out_model_dir == None:
        out_model_dir = in_model_dir
    else:
        if not os.path.exists(out_model_dir):
            os.makedirs(out_model_dir)

    model_file = os.path.join(in_model_dir, model_name)
    args = '{model_file},{data_list},{label_dir},{out_model_dir}'.format(
        model_file=add_quotes(model_file),
        data_list=add_quotes(data_list),
        label_dir=add_quotes(label_dir),
        out_model_dir=add_quotes(out_model_dir))
    cmd = ['matlab_batcher.sh', 'initialize_HTK_GMM', args]
    assert (which.which('matlab_batcher.sh') != None)
    logging.debug(string.join(cmd, ' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))
Ejemplo n.º 9
0
Description:
  Train a voice activity detection Language Model.

nassos@SAIL
'''
import argparse
import logging
import string
import subprocess
import os

from my_utils import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
ngram_bin = which.which('ngram-count')


def ngram_count(args):
    cmd = args
    cmd.insert(0, ngram_bin)
    logging.debug(string.join(cmd, ' '))
    logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))


def ldc_annotations_to_symbol_sequences(label_map, ldc_annotations_list,
                                        samp_period, out_file):
    '''
    Convert LDC annotations to sequences of symbols that can be given to SRILM to estimate the 
    n-gram models
    '''
Ejemplo n.º 10
0
Created on Dec 27, 2011

@author: Nassos Katsamanis, PhD
'''
from multiprocessing import Process, cpu_count, Queue
import sys
import os
import itertools
import math
import subprocess
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hrest_bin = which('HRest')


def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)


def hrest(args):
    get_scp_file = False
    scp_file_found = False
    already_in_parallel_mode = False
    scp_file = ''
    argums = []
    for arg in args:
Ejemplo n.º 11
0
Created on Dec 27, 2011

@author: Nassos Katsamanis, PhD
'''
from multiprocessing import Process, cpu_count, Queue
import sys
import os
import itertools
import math
import subprocess
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
herest_bin = which('HERest')

def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)

def herest(args):
    get_scp_file = False
    scp_file_found = False
    already_in_parallel_mode = False
    scp_file = ''
    argums = []
    for arg in args:
        if arg=='-S':
            get_scp_file = True
Ejemplo n.º 12
0
'''
Created on Dec 28, 2011

@author: nassos
'''
import os
import subprocess
import argparse
import textwrap
import logging
import string
from my_utils import which
os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hinit_bin = which.which('HInit')
hcompv_bin = which.which('HCompV')

n_iterations = 10

def add_quotes(string_var):
    string_var = "\'"+string_var+"\'"
    return string_var

def estimate_minimum_variances(feature_file_list, model_file, out_dir, min_var_factor=0.01):
    '''
    Use HCompV to generate the vFloors file that contains the minimum variances
    allowed for the models to be trained
    '''
    h_cfg_file = os.path.join(out_dir,'hcompv.cfg')
    h_cfg = open(h_cfg_file,'w')
    h_cfg.write('MINVARFLOOR = 0.001')
    h_cfg.close()
Ejemplo n.º 13
0
Created on Dec 27, 2011

@author: Nassos Katsamanis, PhD
'''
from multiprocessing import Process, cpu_count, Queue
import sys
import os
import itertools
import math
import subprocess
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
herest_bin = which('HERest')


def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)


def herest(args):
    get_scp_file = False
    scp_file_found = False
    already_in_parallel_mode = False
    scp_file = ''
    argums = []
    for arg in args:
'''
Created on Dec 29, 2011

@author: nassos
'''
import os
from my_utils import which
import subprocess

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hparse_bin = which.which('HParse')

def create_trivial_grammar(labels, grammar_file, mode='sequence'):
    gr_file = open(grammar_file,'w')
    gr_file.write('$class = ')
    gr_file.write(labels[0])
    for l in labels[1:]:
        gr_file.write(' | '+l)
    gr_file.write(';\n')
    if mode=='sequence':
        gr_file.write('(<$class>)')
    elif mode=='single':
        gr_file.write('($class)')
    gr_file.close() 

def create_trivial_wordnet(labels,wdnet_file, mode='sequence'):
    grammar_file = wdnet_file+'.gram'
    create_trivial_grammar(labels,grammar_file, mode)
    cmd = [hparse_bin,grammar_file,wdnet_file]
    print(os.getcwd())
    print(cmd)
Ejemplo n.º 15
0
designed to run transparently.

@author: Nassos Katsamanis, PhD
'''
import itertools
import math
from multiprocessing import Process, cpu_count
import subprocess
import sys
import os
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hcopy_bin = which('HCopy')
hcopy_par = which('HCopy.pl')
assert(os.path.exists(hcopy_bin))

def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)

def hcopy(args):
    get_scp_file = False
    scp_file_found = False
    scp_file = ''
    argums = []
    for arg in args:
        if arg=='-S':
Ejemplo n.º 16
0
designed to run transparently.

@author: Nassos Katsamanis, PhD
'''
import itertools
import math
from multiprocessing import Process, cpu_count
import subprocess
import sys
import os
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hcopy_bin = which('HCopy')
hcopy_par = which('HCopy.pl')
assert (os.path.exists(hcopy_bin))


def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)


def hcopy(args):
    get_scp_file = False
    scp_file_found = False
    scp_file = ''
    argums = []
Ejemplo n.º 17
0
"""
Created on Dec 28, 2011

@author: nassos
"""
import logging
import os
import string
import subprocess

from htk import hrest, herest
from my_utils import which
from classifiers import initialize_gmm

os.environ["PATH"] += os.pathsep + "/usr/local/bin"
hhed_bin = which.which("HHEd")


def hhed(args):
    cmd = args
    cmd.insert(0, hhed_bin)
    logging.debug(string.join(cmd, " "))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))


def increase_n_components(
    n_mixes,
    model_name,
    model_file,
    training_list,
    lab_dir,
Ejemplo n.º 18
0
Created on Dec 27, 2011

@author: Nassos Katsamanis, PhD
'''
from multiprocessing import Process, cpu_count, Queue
import sys
import os
import itertools
import math
import subprocess
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hrest_bin = which('HRest')

def grouper(n, iterable, fillvalue=None):
    #grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return itertools.izip_longest(fillvalue=fillvalue, *args)

def hrest(args):
    get_scp_file = False
    scp_file_found = False
    already_in_parallel_mode = False
    scp_file = ''
    argums = []
    for arg in args:
        if arg=='-S':
            get_scp_file = True
Ejemplo n.º 19
0
designed to run transparently.

@author: Nassos Katsamanis, PhD
'''
import itertools
import math
from multiprocessing import Process, cpu_count
import subprocess
import sys
import os
import logging
import string
from my_utils.which import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hvite_bin = which('HVite')
hvite_par = which('HVite.pl')

def concatenate_mlfs(out_mlf, mlfs):
    mlf_file = open(out_mlf,'w')
    mlf_file.write('#!MLF!#')
    for fl in mlfs:
        if os.path.exists(fl):
            fl_fid = open(fl,'r')
            # Ignore the first line
            fl_fid.readline()
            for line in fl_fid:
                mlf_file.write(line)

    mlf_file.close()
Ejemplo n.º 20
0
'''
Created on Dec 28, 2011

@author: nassos
'''
import os
import subprocess
import argparse
import textwrap
import logging
import string
from my_utils import which
os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hinit_bin = which.which('HInit')
hcompv_bin = which.which('HCompV')

n_iterations = 10


def add_quotes(string_var):
    string_var = "\'" + string_var + "\'"
    return string_var


def estimate_minimum_variances(feature_file_list,
                               model_file,
                               out_dir,
                               min_var_factor=0.01):
    '''
    Use HCompV to generate the vFloors file that contains the minimum variances
    allowed for the models to be trained
Description:
  Train a voice activity detection Language Model.

nassos@SAIL
'''
import argparse
import logging
import string
import subprocess
import os

from my_utils import which

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
ngram_bin = which.which('ngram-count')

def ngram_count(args):
    cmd = args
    cmd.insert(0, ngram_bin)
    logging.debug(string.join(cmd,' '))
    logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))

def ldc_annotations_to_symbol_sequences(label_map, ldc_annotations_list, samp_period, out_file):
    '''
    Convert LDC annotations to sequences of symbols that can be given to SRILM to estimate the 
    n-gram models
    '''
    ldc_list = open(ldc_annotations_list,'r')

    out = open(out_file,'w')
Ejemplo n.º 22
0
'''
Created on Dec 28, 2011

@author: nassos
'''
import logging
import os
import string
import subprocess

from htk import hrest, herest
from my_utils import which
from classifiers import initialize_gmm

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hhed_bin = which.which('HHEd')


def hhed(args):
    cmd = args
    cmd.insert(0, hhed_bin)
    logging.debug(string.join(cmd, ' '))
    logging.info(subprocess.check_call(cmd, stderr=subprocess.STDOUT))


def increase_n_components(n_mixes,
                          model_name,
                          model_file,
                          training_list,
                          lab_dir,
                          target_model_dir,
Ejemplo n.º 23
0
'''
Created on Dec 29, 2011

@author: nassos
'''
import os
from my_utils import which
import subprocess

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
hparse_bin = which.which('HParse')


def create_trivial_grammar(labels, grammar_file, mode='sequence'):
    gr_file = open(grammar_file, 'w')
    gr_file.write('$class = ')
    gr_file.write(labels[0])
    for l in labels[1:]:
        gr_file.write(' | ' + l)
    gr_file.write(';\n')
    if mode == 'sequence':
        gr_file.write('(<$class>)')
    elif mode == 'single':
        gr_file.write('($class)')
    gr_file.close()


def create_trivial_wordnet(labels, wdnet_file, mode='sequence'):
    grammar_file = wdnet_file + '.gram'
    create_trivial_grammar(labels, grammar_file, mode)
    cmd = [hparse_bin, grammar_file, wdnet_file]