Example #1
0
def make_sre_data(data_config, save_loc):
    save_loc = abspath(save_loc)
    data_loc = join_path(save_loc, DATA_DIR)
    make_directory(data_loc)
    train_data = get_train_data(data_config)
    make_kaldi_data_dir(train_data, join_path(data_loc, 'train_data'))
    print('Made {:d} files for training.'.format(train_data.shape[0]))
    sre_dev_enroll, sre_dev_test, sre_unlabelled = make_sre18_dev_data(
        data_config)
    print(
        'Made {:d} enroll, {:d} test and {:d} unlabeled from sre2018 dev files.'
        .format(sre_dev_enroll.shape[0], sre_dev_test.shape[0],
                sre_unlabelled.shape[0]))
    make_kaldi_data_dir(sre_dev_enroll, join_path(data_loc, 'sre_dev_enroll'))
    make_kaldi_data_dir(sre_dev_test, join_path(data_loc, 'sre_dev_test'))
    make_kaldi_data_dir(sre_unlabelled, join_path(data_loc, 'sre_unlabelled'))
    sre_eval_enroll, sre_eval_test = make_sre18_eval_data(data_config)
    make_kaldi_data_dir(sre_eval_enroll, join_path(data_loc,
                                                   'sre_eval_enroll'))
    make_kaldi_data_dir(sre_eval_test, join_path(data_loc, 'sre_eval_test'))
    print('Made {:d} enroll and {:d} test from sre2018 eval files.'.format(
        sre_eval_enroll.shape[0], sre_eval_test.shape[0]))
    print('Saving data lists..')
    save_object(join_path(data_loc, 'train_data.pkl'), train_data)
    save_object(join_path(data_loc, 'sre_unlabelled.pkl'), sre_unlabelled)
    save_object(join_path(data_loc, 'sre_dev.pkl'),
                (sre_dev_enroll, sre_dev_test))
    save_object(join_path(data_loc, 'sre_eval.pkl'),
                (sre_eval_enroll, sre_eval_test))
    print('Data lists saved at: {}'.format(data_loc))
    return train_data, sre_unlabelled, sre_dev_enroll, sre_dev_test, sre_eval_enroll, sre_eval_test
def make_kaldi_data_dir(args_list, data_loc):
    make_directory(data_loc)
    # run_command('cd {} && mv * .backup/'.format(data_loc))
    args_list = sort_by_index(args_list)
    make_wav_scp(args_list[:, 0], args_list[:, 4], join_path(data_loc, 'wav.scp'))
    make_spk_to_utt(args_list[:, 0], args_list[:, 3], join_path(data_loc, 'spk2utt'))
    make_utt_to_spk(args_list[:, 0], args_list[:, 3], join_path(data_loc, 'utt2spk'))
Example #3
0
def get_log_path(iteration, model_tag, save_loc, worker_id=None, operation='train'):
    log_loc = join_path(save_loc, '{}/{}'.format(LOGS_DIR, model_tag))
    make_directory(log_loc)
    if worker_id is not None:
        log_path = '{}.{}.{}.log'.format(operation, iteration, worker_id)
    else:
        log_path = '{}.{}.log'.format(operation, iteration)
    return join_path(log_loc, log_path)
Example #4
0
def get_model_path(iteration, model_tag, save_loc, worker_id=None):
    model_loc = join_path(save_loc, '{}/{}'.format(MODELS_DIR, model_tag))
    if worker_id is not None:
        model_path = 'iteration_{}_{}'.format(iteration, worker_id)
    else:
        model_path = 'iteration_{}'.format(iteration)
    model_path = join_path(model_loc, model_path)
    make_directory(model_path)
    return model_path
                    default=NUM_FEATURES,
                    help='Dimension of input features.')
parser.add_argument('--num-jobs',
                    type=int,
                    default=NUM_CPU_WORKERS,
                    help='Number of CPU Workers')
parser.add_argument('--save', default=SAVE_LOC, help='Save location.')
parser.add_argument('--worker-id', type=int, default=0, help='Worker Id')
args = parser.parse_args()

print_script_args(sys.argv)
print('Started at: {}\n'.format(get_time_stamp()))

save_loc = abspath(args.save)
tmp_loc = join_path(save_loc, '{}/{}'.format(TMP_DIR, args.model_tag))
make_directory(tmp_loc)

egs_scp = join_path(save_loc, '{}/egs.{}.scp'.format(EGS_DIR, args.egs_index))
read_scp = join_path(tmp_loc, 'read_egs.{}.'.format(args.worker_id) + '{}.scp')

initial_path = get_model_path(args.iteration - 1, args.model_tag, save_loc)
model_path = get_model_path(args.iteration, args.model_tag, save_loc,
                            args.worker_id)


def get_batch(items):
    batch_list, batch_id = items
    labels = []
    scp_file = read_scp.format(batch_id)
    with open(scp_file, 'w') as f:
        for utt, ark, l in batch_list:
                    default=8000,
                    help='Sampling Rate')
parser.add_argument('--save', default='../save', help='Save Location')
args = parser.parse_args()


def make_feats(mfcc, split, save_loc):
    data_loc = join_path(join_path(save_loc, DATA_DIR), split)
    mfcc.extract_with_vad_and_normalization(data_loc, split)


if __name__ == '__main__':
    args.save = abspath(args.save)
    mfcc_loc = join_path(args.save, MFCC_DIR)
    vad_loc = join_path(args.save, VAD_DIR)
    make_directory(mfcc_loc)
    make_directory(vad_loc)
    mfcc_ = MFCC(fs=args.sample_rate,
                 fl=20,
                 fh=3700,
                 frame_len_ms=25,
                 n_ceps=args.num_features,
                 n_jobs=args.num_jobs,
                 save_loc=args.save)
    for split_ in [
            'train_data', 'sre_unlabelled', 'sre_dev_enroll', 'sre_dev_test',
            'sre_eval_enroll', 'sre_eval_test'
    ]:
        print('Making features for {}..'.format(split_))
        make_feats(mfcc_, split_, args.save)
        print('Finished making features for {}..'.format(split_))
Example #7
0
                    help='Dimension of input features')
parser.add_argument('--ps', help='Parameter Server(s)')
parser.add_argument('--save', default='../save', help='Save Location')
parser.add_argument('--steps',
                    type=int,
                    default=500000,
                    help='Total global steps')
parser.add_argument('--type', default='ps', help='Instance Type')
parser.add_argument('--task-index', type=int, default=0, help='Task Index')
parser.add_argument('--workers', help='Worker Nodes')
args = parser.parse_args()

save_loc = abspath(args.save)
egs_loc = join(save_loc, EGS_DIR)
model_loc = join(save_loc, MODELS_DIR)
make_directory(model_loc)
model_loc = join(model_loc, args.model_tag)
make_directory(model_loc)

early_stop_file = join(model_loc, 'early_stop')

tmp_loc = join(save_loc, TMP_DIR)
make_directory(tmp_loc)

ps = args.ps.split(',')
workers = args.workers.split(',')
num_workers = len(workers)

use_gpu(args.gpu)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
Example #8
0
from os.path import abspath, join as join_path

from constants.app_constants import LOGS_DIR
from services.common import make_directory
from services.distributed import append_ps_and_workers, assign_nodes, make_parameter_servers, make_workers, submit_jobs

import argparse as ap

parser = ap.ArgumentParser()
parser.add_argument('--cmd', default='python -u async_train.py', help='Python script command. Eg: python -u train.py --epochs 10')
parser.add_argument('--model-tag', default='HGRU', help='Model Tag')
parser.add_argument('--num-ps', type=int, default=1, help='Number of Parameter Servers')
parser.add_argument('--num-workers', type=int, default=3, help='Number of Workers')
parser.add_argument('--save', default='../save', help='Save Location')
parser.add_argument('--start-port', type=int, default=7770, help='Starting port value')

args = parser.parse_args()
save_loc = abspath(args.save)
log_loc = join_path(save_loc, LOGS_DIR)
make_directory(log_loc)

print('Making {} Parameter Server(s) and {} Worker Nodes...'.format(args.num_ps, args.num_workers))
ps, workers = assign_nodes(args.num_ps, args.num_workers, args.start_port)
cmd = append_ps_and_workers(args.cmd, ps, workers)
ps_list = make_parameter_servers(ps, cmd, args.model_tag, log_loc)
workers_list = make_workers(workers, cmd, args.model_tag, log_loc)
jobs_list = ps_list + workers_list
print('Submitting jobs to parameter server(s) and worker nodes...')
submit_jobs(jobs_list)
print('Finished.')
                    help='Number of MFCC Co-efficients')
parser.add_argument('--num-classes',
                    type=int,
                    default=3769,
                    help='Number of MFCC Co-efficients')
parser.add_argument('--num-workers',
                    type=int,
                    default=10,
                    help='Number of Workers')
parser.add_argument('--save', default='../save', help='Save Location')
parser.add_argument('--worker-id', type=int, default=0, help='Worker Id')
args = parser.parse_args()

save_loc = abspath(args.save)
embedding_loc = join_path(save_loc, '{}/{}'.format(EMB_DIR, args.model_tag))
make_directory(embedding_loc)

tmp_loc = join_path(save_loc, '{}/{}'.format(TMP_DIR, args.model_tag))
make_directory(tmp_loc)

read_scp = join_path(tmp_loc,
                     'extract_read.{}'.format(args.worker_id) + '.{}.scp')


def get_batch(items):
    utt, ark, batch_id = items
    scp_file = read_scp.format(batch_id)

    with open(scp_file, 'w') as scp:
        scp.write('{} {}'.format(utt, ark))