def predict(model, x, offsets, num_classes, network, gpu, winlen, timedelay, ft, progress=True): if is_nn_recurrent(network): utt_len = offsets[1:] - offsets[:-1] utt_idx = np.flip(utt_len.argsort(), axis=0) utt_idx_rev = np.zeros(len(utt_len), dtype=np.int) utt_idx_rev[utt_idx] = range(len(utt_len)) xb = np.zeros( (len(utt_len), utt_len[utt_idx[0]] + timedelay, x.shape[1]), dtype=np.float32) for i, idx in enumerate(utt_idx): offset_beg = offsets[idx] offset_end = offsets[idx + 1] x_ = x[offset_beg:offset_end, :] x_ = np.pad(x_, ((0, timedelay), (0, 0)), mode="edge") if ft is not None: xb[i, :x_.shape[0], :] = applyKaldiFeatureTransform(x_, ft) else: xb[i, :x_.shape[0], :] = x_ yb = None model.reset_state() if progress: bar = progressbar.ProgressBar(max_value=xb.shape[1]) for t in range(xb.shape[1]): batch_size = np.sum(utt_len > t) xg = xb[:, t, :] if gpu >= 0: xg = chainer.cuda.to_gpu(xg, device=gpu) with chainer.no_backprop_mode(): y = model(chainer.Variable(xg)).data y = chainer.cuda.to_cpu(y) y = y - logsum(y, axis=1) if yb is None: yb = np.zeros( (xb.shape[0], xb.shape[1] - timedelay, y.shape[1]), dtype=np.float32) if t >= timedelay: yb[:batch_size, t - timedelay, :] = y[:batch_size] if progress: bar += 1 y_out = [] for i, idx in enumerate(utt_idx_rev): y_out.append(yb[idx, :utt_len[i], :].reshape((utt_len[i], -1))) y_out = np.concatenate(y_out, axis=0) else: y_out = [] batch_size = 1024 offset = 0 if progress: bar = progressbar.ProgressBar(max_value=x.shape[0]) while offset < x.shape[0]: offset_end = min(offset + batch_size, x.shape[0]) x_, _ = prepareBatch(x, [], np.arange(offset, offset_end), winlen) if ft is not None: x_ = applyKaldiFeatureTransform(x_, ft) if gpu < 0: xg = x_ else: xg = chainer.cuda.to_gpu(x_, device=gpu) with chainer.no_backprop_mode(): y = model(xg).data y = chainer.cuda.to_cpu(y) y = y - logsum(y, axis=1) y_out.append(y) offset += batch_size if progress: bar.update(offset_end) y_out = np.concatenate(y_out, axis=0) return y_out
def main(arg_list=None): parser = argparse.ArgumentParser(description='Chainer LSTM') parser.add_argument('--epoch', '-e', type=int, nargs='+', default=[20], help='Number of sweeps over the dataset to train') parser.add_argument('--optimizer', '-o', nargs='+', default=['momentumsgd'], help='Optimizer (sgd, momentumsgd, adam)') parser.add_argument('--batchsize', '-b', type=int, nargs='+', default=[128], help='Number of training points in each mini-batch') parser.add_argument('--lr', type=float, nargs='+', default=[1e-2, 1e-3, 1e-4, 1e-5], help='Learning rate') parser.add_argument( '--network', '-n', default='ff', help= 'Neural network type, either "ff", "tdnn", "lstm", "zoneoutlstm", "peepholelstm" or "gru". Setting any recurrent network implies "--shuffle-sequences"' ) parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--units', '-u', type=int, nargs='+', default=[1024], help='Number of units') parser.add_argument('--layers', '-l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--activation', '-a', default='relu', help='FF activation function (sigmoid, tanh or relu)') parser.add_argument('--tdnn-ksize', type=int, nargs='+', default=[5], help='TDNN kernel size') parser.add_argument('--bproplen', type=int, default=20, help='Backpropagation length') parser.add_argument('--timedelay', type=int, default=0, help='Delay target values by this many time steps') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') parser.add_argument('--splice', type=int, default=0, help='Splicing size') parser.add_argument( '--dropout', '-d', type=float, nargs='+', default=[0], help= 'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio' ) parser.add_argument('--ft', default='final.feature_transform', help='Kaldi feature transform file') parser.add_argument('--tri', action='store_true', help='Use triphones') parser.add_argument( '--shuffle-sequences', action='store_true', help= 'True if sequences should be shuffled as a whole, otherwise all frames will be shuffled independent of each other' ) parser.add_argument( '--data-dir', default='data/fmllr', help= 'Data directory, this will be prepended to data files and feature transform' ) parser.add_argument( '--offset-dir', default='data', help='Data directory, this will be prepended to offset files') parser.add_argument( '--target-dir', default='data/targets', help='Data directory, this will be prepended to target files') parser.add_argument( '--ivector-dir', help='Data directory, this will be prepended to ivector files') parser.add_argument('--data', default='data_{}.npy', help='Training data') parser.add_argument('--offsets', default='offsets_{}.npy', help='Training offsets') parser.add_argument('--targets', default='targets_{}.npy', help='Training targets') parser.add_argument('--ivectors', default='ivectors_{}.npy', help='Training ivectors') parser.add_argument('--no-validation', dest='use_validation', action='store_false', help='Do not evaluate validation data while training') parser.add_argument('--train-fold', type=int, help='Train fold network with this ID') parser.add_argument('--train-rpl', action='store_true', help='Train RPL layer') parser.add_argument('--rpl-model', default="result_rpl/model", help='RPL layer model') parser.add_argument('--fold-data-dir', default="fold_data", help='Directory with fold input data') parser.add_argument('--fold-output-dir', default="fold_data_out", help='Directory with predicted fold output') parser.add_argument('--fold-model-dir', default="fold_models", help='Directory with output fold model') parser.add_argument( '--fold-data-pattern', default='data_{0}.npy', help= 'Filename pattern of each fold data, {0} will be replaced by fold ID') parser.add_argument('--fold-offset-pattern', default='offsets_{0}.npy', help='Filename pattern of each fold offset') parser.add_argument('--fold-target-pattern', default='targets_{0}.npy', help='Filename pattern of each fold targets') parser.add_argument( '--fold-ivector-pattern', default='ivectors_{}.npy', help= 'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID' ) parser.add_argument('--fold-output-pattern', default='data_{0}.npy', help='Filename pattern of each fold network output') parser.add_argument('--fold-network-pattern', default='fold_{0}.npz', help='Filename pattern of each fold network') parser.add_argument('--no-progress', action='store_true', help='Disable progress bar') if arg_list is not None: args = parser.parse_args(list(map(str, arg_list))) else: args = parser.parse_args() # set options implied by other options if is_nn_recurrent(args.network): args.shuffle_sequences = True # create output directories Path(args.out).mkdir(exist_ok=True, parents=True) if args.train_fold is not None: file_out = Path(args.fold_model_dir, args.fold_network_pattern.format(args.train_fold)) Path(file_out.parent).mkdir(exist_ok=True, parents=True) # print arguments to the file with open(args.out + "/args.txt", "w") as f: for attr in dir(args): if not attr.startswith('_'): f.write('# {}: {}\n'.format(attr, getattr(args, attr))) f.write(' '.join( map(lambda x: "'" + x + "'" if ' ' in x else x, sys.argv)) + '\n') # print arguments to stdout for attr in dir(args): if not attr.startswith('_'): print('# {}: {}'.format(attr, getattr(args, attr))) print('') # input feature vector length num_classes = 1909 if args.tri else 39 # create model if args.train_rpl: model = RPL4(num_classes) model_cls = L.Classifier(model) else: if args.activation == "sigmoid": activation = F.sigmoid elif args.activation == "tanh": activation = F.tanh elif args.activation == "relu": activation = F.relu else: print("Wrong activation function specified") return model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) # classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model_cls = L.Classifier(model) if args.gpu >= 0: # make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model_cls.to_gpu() # copy the model to the GPU offsets = offsets_dev = None if args.train_rpl: # load training data fold = 0 x = [] y = [] while True: x_file = Path(args.fold_output_dir, args.fold_output_pattern.format(fold)) y_file = Path(args.fold_data_dir, args.fold_target_pattern.format(fold)) if not x_file.is_file() or not y_file.is_file(): break print("Loading fold {} data".format(fold)) x_ = np.load(str(x_file)) y_ = np.load(str(y_file)) x.append(x_) y.append(y_) fold += 1 if fold == 0: print("Error: No fold data found") return x = np.concatenate(x, axis=0) y = np.concatenate(y, axis=0) if args.use_validation: #TODO: use args.data instead of args.dev_data x_dev = np.load(str(Path(args.data_dir, args.data.format("dev")))) # offsets_dev = loadBin(str(Path(args.datadir, args.dev_offsets)), np.int32) y_dev = np.load( str(Path(args.target_dir, args.targets.format("dev")))) else: # load training data ivectors = None ivectors_dev = None if args.train_fold is not None: x = [] offsets = [0] y = [] ivectors = [] num = 0 fold = 0 while True: if fold != args.train_fold: x_file = Path(args.fold_data_dir, args.fold_data_pattern.format(fold)) if not x_file.is_file(): break offsets_file = Path(args.fold_data_dir, args.fold_offset_pattern.format(fold)) y_file = Path(args.fold_data_dir, args.fold_target_pattern.format(fold)) if args.ivector_dir is not None: ivectors_file = Path( args.fold_data_dir, args.fold_ivector_pattern.format(fold)) if not ivectors_file.is_file(): print("Error: missing ivectors for fold data {}". format(fold)) return print("Loading fold {} data".format(fold)) x_fold = np.load(str(x_file)) x.append(x_fold) if is_nn_recurrent(args.network): offsets_fold = np.load(str(offsets_file)) offsets.extend(offsets_fold[1:] + num) y_fold = np.load(str(y_file)) y.append(y_fold) if args.ivector_dir is not None: ivectors_fold = np.load(str(ivectors_file)) ivectors.append(ivectors_fold) num += x_fold.shape[0] fold += 1 if len(x) == 0: print("Error: No fold data found") return x = np.concatenate(x, axis=0) if is_nn_recurrent(args.network): offsets = np.array(offsets, dtype=np.int32) y = np.concatenate(y, axis=0) if args.ivector_dir is not None: ivectors = np.concatenate(ivectors, axis=0) else: x = np.load(str(Path(args.data_dir, args.data.format("train")))) if is_nn_recurrent(args.network): offsets = np.load( str(Path(args.offset_dir, args.offsets.format("train")))) y = np.load( str(Path(args.target_dir, args.targets.format("train")))) if args.ivector_dir is not None: ivectors = np.load( str(Path(args.ivector_dir, args.ivectors.format("train")))) if args.use_validation: x_dev = np.load(str(Path(args.data_dir, args.data.format("dev")))) if is_nn_recurrent(args.network): offsets_dev = np.load( str(Path(args.offset_dir, args.offsets.format("dev")))) y_dev = np.load( str(Path(args.target_dir, args.targets.format("dev")))) if args.ivector_dir is not None: ivectors_dev = np.load( str(Path(args.ivector_dir, args.ivectors.format("dev")))) # apply splicing if args.network == "tdnn": splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2 else: splice = args.splice if splice > 0: x = splicing(x, range(-splice, splice + 1)) x_dev = splicing(x_dev, range(-splice, splice + 1)) # load feature transform if not args.ft and args.ft != '-': ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft))) if is_nn_recurrent( args.network ): # select transform middle frame if the network is recurrent dim = ft["shape"][1] zi = ft["shifts"].index(0) ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim] ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim] ft["shape"][0] = dim ft["shifts"] = [0] elif args.network == "tdnn": dim = ft["shape"][1] zi = ft["shifts"].index(0) winlen = 2 * splice + 1 ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim], winlen) ft["addShift"] = np.tile( ft["addShift"][zi * dim:(zi + 1) * dim], winlen) ft["shape"][0] = dim * winlen ft["shifts"] = list(range(-splice, splice + 1)) # apply feature transform x = applyKaldiFeatureTransform(x, ft) if args.use_validation: x_dev = applyKaldiFeatureTransform(x_dev, ft) if ivectors is not None: x = np.concatenate((x, ivectors), axis=1) if ivectors_dev is not None: x_dev = np.concatenate((x_dev, ivectors_dev), axis=1) # shift the input dataset according to time delay if is_nn_recurrent(args.network) and args.timedelay != 0: x, y, offsets = apply_time_delay(x, y, offsets, args.timedelay) if args.use_validation: x_dev, y_dev, offsets_dev = apply_time_delay( x_dev, y_dev, offsets_dev, args.timedelay) # create chainer datasets train_dataset = chainer.datasets.TupleDataset(x, y) if args.use_validation: dev_dataset = chainer.datasets.TupleDataset(x_dev, y_dev) # prepare train stages train_stages_len = max(len(args.batchsize), len(args.lr)) train_stages = [{ 'epoch': index_padded(args.epoch, i), 'opt': index_padded(args.optimizer, i), 'bs': index_padded(args.batchsize, i), 'lr': index_padded(args.lr, i) } for i in range(train_stages_len)] for i, ts in enumerate(train_stages): if ts['opt'] == 'adam': # learning rate not used, don't print it print( "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}" .format(i, ts['epoch'], ts['bs'], ts['opt'])) else: print( "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}, learning rate = {}" .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['lr'])) # reset state to allow training with different batch size in each stage if not args.train_rpl and is_nn_recurrent(args.network): model.reset_state() # setup an optimizer if ts['opt'] == "sgd": optimizer = chainer.optimizers.SGD(lr=ts['lr']) elif ts['opt'] == "momentumsgd": optimizer = chainer.optimizers.MomentumSGD(lr=ts['lr']) elif ts['opt'] == "adam": optimizer = chainer.optimizers.Adam() else: print("Wrong optimizer specified: {}".format(ts['opt'])) exit(1) optimizer.setup(model_cls) if args.shuffle_sequences: train_iter = SequenceShuffleIterator(train_dataset, offsets, ts['bs']) if args.use_validation: dev_iter = SequenceShuffleIterator(dev_dataset, None, ts['bs'], repeat=False, shuffle=False) else: train_iter = SerialIterator(train_dataset, ts['bs']) if args.use_validation: dev_iter = SerialIterator(dev_dataset, ts['bs'], repeat=False, shuffle=False) # set up a trainer if is_nn_recurrent(args.network): updater = BPTTUpdater(train_iter, optimizer, args.bproplen, device=args.gpu) else: updater = StandardUpdater(train_iter, optimizer, device=args.gpu) if args.use_validation: stop_trigger = EarlyStoppingTrigger(ts['epoch'], key='validation/main/loss', eps=-0.001) else: stop_trigger = (ts['epoch'], 'epoch') trainer = training.Trainer(updater, stop_trigger, out="{}/{}".format(args.out, i)) trainer.extend(model_saver) # evaluate the model with the development dataset for each epoch if args.use_validation: trainer.extend( extensions.Evaluator(dev_iter, model_cls, device=args.gpu)) # dump a computational graph from 'loss' variable at the first iteration # the "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # take a snapshot for each specified epoch frequency = ts['epoch'] if args.frequency == -1 else max( 1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # save two plot images to the result dir if args.plot and extensions.PlotReport.available(): plot_vars_loss = ['main/loss'] plot_vars_acc = ['main/accuracy'] if args.use_validation: plot_vars_loss.append('validation/main/loss') plot_vars_acc.append('validation/main/accuracy') trainer.extend( extensions.PlotReport(plot_vars_loss, 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(plot_vars_acc, 'epoch', file_name='accuracy.png')) # print selected entries of the log to stdout # here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. if args.use_validation: print_report_vars = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] else: print_report_vars = [ 'epoch', 'main/loss', 'main/accuracy', 'elapsed_time' ] trainer.extend(extensions.PrintReport(print_report_vars)) # print a progress bar to stdout # trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # load the last model if the max epoch was not reached (that means early stopping trigger stopped training # because the validation loss increased) if updater.epoch_detail < ts['epoch']: chainer.serializers.load_npz("{}/{}/model_tmp".format(args.out, i), model_cls) # remove temporary model from this training stage os.remove("{}/{}/model_tmp".format(args.out, i)) # save the final model chainer.serializers.save_npz("{}/model".format(args.out), model_cls) if args.train_fold is not None: chainer.serializers.save_npz( str( Path(args.fold_model_dir, args.fold_network_pattern.format(args.train_fold))), model_cls)
def main(arg_list=None): parser = argparse.ArgumentParser(description='Chainer LSTM') parser.add_argument( '--network', '-n', default='ff', help= 'Neural network type, either "ff", "lstm" or "tdnn". Setting "lstm" implies "--shuffle-sequences"' ) parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--units', '-u', type=int, nargs='+', default=[1024], help='Number of units') parser.add_argument('--layers', '-l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--activation', '-a', default='relu', help='FF activation function (sigmoid, tanh or relu)') parser.add_argument('--tdnn-ksize', type=int, nargs='+', default=[5], help='TDNN kernel size') parser.add_argument('--timedelay', type=int, default=0, help='Delay target values by this many time steps') parser.add_argument('--splice', type=int, default=0, help='Splicing size') parser.add_argument( '--dropout', '-d', type=float, nargs='+', default=0, help= 'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio' ) parser.add_argument('--ft', help='Kaldi feature transform file') parser.add_argument('--tri', action='store_true', help='Use triphones') parser.add_argument('--data-dir', default='data/fmllr') parser.add_argument('--offset-dir', default='data') parser.add_argument('--ivector-dir', help="Directory with i-vector files") parser.add_argument('--data', default='data_{}.npy', help='Input data') parser.add_argument('--offsets', default='offsets_{}.npy', help='Input offsets') parser.add_argument('--ivectors', default='ivectors_{}.npy') parser.add_argument('--fold-data-dir', help='Directory with fold input data') parser.add_argument('--fold-output-dir', help='Directory with predicted fold output') parser.add_argument('--fold-model-dir', help='Directory with output fold model') parser.add_argument( '--fold-output-dev', help='Output file with predicted development data values') parser.add_argument( '--fold-data-pattern', default='data_{}.npy', help= 'Filename pattern of each fold data, {} will be replaced by fold ID') parser.add_argument('--fold-offset-pattern', default='offsets_{}.npy', help='Filename pattern of each fold offset') parser.add_argument( '--fold-ivector-pattern', default='ivectors_{}.npy', help= 'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID' ) parser.add_argument('--fold-output-pattern', default='data_{}.npy', help='Filename pattern of each fold network output') parser.add_argument('--fold-network-pattern', default='fold_{}.npz', help='Filename pattern of each fold network') parser.add_argument('--no-progress', action='store_true', help='Disable progress bar') if arg_list is not None: args = parser.parse_args(list(map(str, arg_list))) else: args = parser.parse_args() # create output directories if args.fold_output_dev is not None: out_file = Path(args.fold_output_dir, args.fold_output_dev) else: out_file = Path(args.fold_output_dir, args.fold_output_pattern) Path(out_file.parent).mkdir(exist_ok=True, parents=True) # input feature vector length num_classes = 1909 if args.tri else 39 chainer.config.train = False # create model if args.activation == "sigmoid": activation = F.sigmoid elif args.activation == "tanh": activation = F.tanh elif args.activation == "relu": activation = F.relu else: print("Wrong activation function specified") exit(1) model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) # classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model_cls = L.Classifier(model) if args.gpu >= 0: # make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model_cls.to_gpu() # copy the model to the GPU if args.network == "tdnn": splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2 else: splice = args.splice winlen = 2 * splice + 1 # load feature transform if not args.ft and args.ft != '-': ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft))) if is_nn_recurrent( args.network ): # select transform middle frame if the network is recurrent dim = ft["shape"][1] zi = ft["shifts"].index(0) ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim] ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim] ft["shape"][0] = dim ft["shifts"] = [0] elif args.network == "tdnn": dim = ft["shape"][1] zi = ft["shifts"].index(0) winlen = 2 * splice + 1 ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim], winlen) ft["addShift"] = np.tile(ft["addShift"][zi * dim:(zi + 1) * dim], winlen) ft["shape"][0] = dim * winlen ft["shifts"] = list(range(-splice, splice + 1)) else: ft = None if args.fold_output_dev is not None: x = np.load(str(Path(args.data_dir, args.data.format("dev")))) if is_nn_recurrent(args.network): offsets = np.load( str(Path(args.offset_dir, args.offsets.format("dev")))) else: offsets = None if args.ivector_dir: ivectors = np.load( str(Path(args.ivector_dir, args.ivectors.format("dev")))) x = np.concatenate((x, ivectors), axis=1) y_out = 0 fold = 0 while True: model_file = Path(args.fold_model_dir, args.fold_network_pattern.format(fold)) if not model_file.is_file(): break serializers.load_npz(str(model_file), model_cls) print("Predicting fold {} data".format(fold)) y = predict(model, x, offsets, num_classes, args.network, args.gpu, winlen, args.timedelay, ft, not args.no_progress) y_out += y fold += 1 if fold == 0: print("Error: No fold networks found") exit(2) y_out /= fold y_out = y_out - logsum(y_out, axis=1) np.save(str(Path(args.fold_output_dir, args.fold_output_dev)), y_out) else: fold = 0 while True: model_file = Path(args.fold_model_dir, args.fold_network_pattern.format(fold)) if not model_file.is_file(): break serializers.load_npz(str(model_file), model_cls) print("Predicting fold {} data".format(fold)) x = np.load( str( Path(args.fold_data_dir, args.fold_data_pattern.format(fold)))) if is_nn_recurrent(args.network): offsets = np.load( str( Path(args.fold_data_dir, args.fold_offset_pattern.format(fold)))) else: offsets = None if args.ivector_dir: ivectors = np.load( str( Path(args.fold_data_dir, args.fold_ivector_pattern.format(fold)))) x = np.concatenate((x, ivectors), axis=1) y = predict(model, x, offsets, num_classes, args.network, args.gpu, winlen, args.timedelay, ft, not args.no_progress) np.save( str( Path(args.fold_output_dir, args.fold_output_pattern.format(fold))), y) fold += 1 if fold == 0: print("Error: No fold networks found") exit(2)
def main(arg_list=None): parser = argparse.ArgumentParser(description='Chainer Evaluation') parser.add_argument('--network', '-n', default='ff', help='Neural network type, either "ff" or "lstm"') parser.add_argument('--model', '-m', default='', help='Path to the model') parser.add_argument('--units', '-u', type=int, nargs='+', default=[1024], help='Number of units') parser.add_argument('--layers', '-l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--activation', '-a', default='relu', help='FF activation function (sigmoid, tanh or relu)') parser.add_argument('--tdnn-ksize', type=int, nargs='+', default=[5], help='TDNN kernel size') parser.add_argument('--timedelay', type=int, default=0, help='Delay target values by this many time steps') parser.add_argument('--splice', type=int, default=0, help='Splicing size') parser.add_argument( '--dropout', '-d', type=float, nargs='+', default=[0], help= 'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio' ) parser.add_argument('--tri', action='store_true', help='Use triphones') parser.add_argument('--ft', default='final.feature_transform', help='Kaldi feature transform file') parser.add_argument( '--data-dir', default='data/fmllr', help= 'Data directory, this will be prepended to data files and feature transform' ) parser.add_argument( '--offset-dir', default='data', help='Data directory, this will be prepended to offset files') parser.add_argument( '--ivector-dir', help='Data directory, this will be prepended to ivector files') parser.add_argument('--recog-dir', required=True, help='Directory with recognizer files') parser.add_argument('--utt-list-dir', default='data', help='Directory with utterance lists') parser.add_argument('--data', default='data_{}.npy', help='Data file') parser.add_argument('--offsets', default='offsets_{}.npy', help='Offset file') parser.add_argument('--ivectors', default='ivectors_{}.npy', help='ivectors file') parser.add_argument('--PIP', type=float, default=20) parser.add_argument('--LMW', type=float, default=1) parser.add_argument('--ap-coef', type=float, default=1) parser.add_argument('--ap-file', default='log_ap_Kaldi1909.npy', help='Path relative to recogdir') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--test-or-dev', default='test', help='Test or dev') parser.add_argument('--rpl', action='store_true', help='Use RPL layer with folds') parser.add_argument('--no-rpl-layer', action='store_true', help='Disable RPL layer') parser.add_argument('--rpl-model', default="result_rpl/model", help='RPL layer model') parser.add_argument('--fold-model-dir', default="fold_models", help='Directory with trained fold models') parser.add_argument('--fold-network-pattern', default='fold_{0}.npz', help='Filename pattern of each fold network') parser.add_argument('--master-network', default="-", help='Master network') parser.add_argument('--no-progress', action='store_true', help='Disable progress bar') if arg_list is not None: args = parser.parse_args(list(map(str, arg_list))) else: args = parser.parse_args() num_classes = 1909 if args.tri else 39 chainer.config.train = False if args.activation == "sigmoid": activation = F.sigmoid elif args.activation == "tanh": activation = F.tanh elif args.activation == "relu": activation = F.relu else: print("Wrong activation function specified") return if args.rpl: fold = 0 fold_models = [] if args.master_network != "-": print("Loading master network") master_model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) master_model_cls = L.Classifier(master_model) chainer.serializers.load_npz(args.master_network, master_model_cls) else: master_model = None if args.fold_network_pattern != "-": while True: model_file = Path(args.fold_model_dir, args.fold_network_pattern.format(fold)) if not model_file.is_file(): break print("Loading fold {} network".format(fold)) fold_model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) fold_model_cls = L.Classifier(fold_model) chainer.serializers.load_npz(model_file, fold_model_cls) fold_models.append(fold_model) fold += 1 if args.rpl_model != "-": rpl_model = RPL4(num_classes) rpl_model_cls = L.Classifier(rpl_model) chainer.serializers.load_npz(args.rpl_model, rpl_model_cls) else: rpl_model = None model = NNWithRPL(master_model, fold_models, rpl_model) else: model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) model_cls = L.Classifier(model) chainer.serializers.load_npz(args.model, model_cls) if args.network == "tdnn": splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2 else: splice = args.splice if not args.ft and args.ft != '-': ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft))) if is_nn_recurrent(args.network): dim = ft["shape"][1] zi = ft["shifts"].index(0) ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim] ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim] ft["shape"][0] = dim ft["shifts"] = [0] elif args.network == "tdnn": dim = ft["shape"][1] zi = ft["shifts"].index(0) winlen = 2 * splice + 1 ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim], winlen) ft["addShift"] = np.tile(ft["addShift"][zi * dim:(zi + 1) * dim], winlen) ft["shape"][0] = dim * winlen ft["shifts"] = list(range(-splice, splice + 1)) else: ft = None data = np.load(str(Path(args.data_dir, args.data.format(args.test_or_dev)))) if splice > 0: data = splicing(data, range(-splice, splice + 1)) if ft is not None: data = applyKaldiFeatureTransform(data, ft) offsets = np.load( str(Path(args.offset_dir, args.offsets.format(args.test_or_dev)))) if args.ivector_dir is not None: ivectors = np.load( str(Path(args.ivector_dir, args.ivectors.format(args.test_or_dev)))) data = np.concatenate((data, ivectors), axis=1) if args.tri: ap = args.ap_coef * np.load(str(Path(args.recog_dir, args.ap_file))) per = evaluateModelTestTri(model, data, offsets, args.PIP, args.LMW, ap=ap, testOrDev=args.test_or_dev, uttlistdir=args.utt_list_dir, recogdir=args.recog_dir, GPUID=args.gpu, progress=not args.no_progress, rnn=is_nn_recurrent(args.network)) else: print("Monophones not implemented") return print("PER: {0:.2f} %".format(per))