def main(): # TODO Be able to pass in different models into training script as well? model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("epochs", type=int, help="number of epochs to train") parser.add_argument("--opt", default="nag", help="optimizer to use", choices=["cm", "nag"]) parser.add_argument("--anneal_factor", type=float, default=2.0, help="annealing factor after each epoch") parser.add_argument("out_dir", help="output directory to write model files") parser.add_argument("--cfg_file", help="cfg file for restarting run") model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) cfg = args.__dict__.copy() if not cfg["cfg_file"]: cfg["cfg_file"] = pjoin(args.out_dir, "cfg.json") add_run_data(cfg) dump_config(cfg, cfg["cfg_file"]) # Load dataset # dataset = CharStream(CONTEXT, args.batch_size, step=1) dataset = UttCharStream(args.batch_size) # Construct network model = model_class(dataset, model_hps, opt_hps, opt=args.opt) # Run training for k in xrange(0, args.epochs): it = 0 while dataset.data_left(): model.run() if it % 1 == 0: logger.info( "epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f" % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm) ) # gnp.memory_allocators() # print gnp.memory_in_use() it += 1 if it % SAVE_PARAMS_EVERY == 0: params_file = pjoin(args.out_dir, "params_save_every.pk") with open(params_file, "wb") as fout: model.to_file(fout) # Anneal model.opt.alpha /= args.anneal_factor # Save final parameters params_file = pjoin(args.out_dir, "params_epoch{0:02}.pk".format(k + 1)) with open(params_file, "wb") as fout: model.to_file(fout) # Symlink param file to latest sym_file = pjoin(args.out_dir, "params.pk") if os.path.exists(sym_file): os.remove(sym_file) os.symlink(params_file, sym_file) if k != args.epochs - 1: model.start_next_epoch()
probs = probs / sum(probs) w = np.random.choice(range(model.hps.output_size), p=probs) char = chars[w] return char if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('cfg_file', help='config file with run data for model to use') args = parser.parse_args() cfg = load_config(args.cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) cfg = CfgStruct(**cfg) SAMPLES = 100 SAMPLE_LENGTH = 100 # PARAM ALPHA = 1.0 # FIXME PARAM LM_ORDER = CONTEXT + 1 with open(CHAR_CORPUS_VOCAB_FILE, 'rb') as fin: char_inds = pickle.load(fin) chars = dict((v, k) for k, v in char_inds.iteritems())
dhs[k][t-1] = mult(Whh.T, dus[k][t]) self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize self.grads['bih'] += dus[0][t].sum(axis=-1).reshape((-1, 1)) / bsize return cost, self.grads if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) model_hps = RNNHyperparams() model_hps.hidden_size = 10 opt_hps = OptimizerHyperparams() model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) dset = UttCharStream(args.batch_size) # Construct network model = RNN(dset, model_hps, opt_hps, opt='nag') model.run(check_grad=True)
(-1, 1)) / bsize self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize self.grads['bih'] += dus[0][t].sum(axis=-1).reshape( (-1, 1)) / bsize return cost, self.grads if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) model_hps = RNNHyperparams() model_hps.hidden_size = 10 opt_hps = OptimizerHyperparams() model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) dset = UttCharStream(args.batch_size) # Construct network model = RNN(dset, model_hps, opt_hps, opt='nag') model.run(check_grad=True)
def runSeq(opts): fid = open(opts.out_file, 'w') phone_map = get_char_map(opts.dataDir) print phone_map print len(phone_map) alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) hyps = list() refs = list() hypscores = list() refscores = list() numphones = list() subsets = list() alignments = list() if MODEL_TYPE != 'ngram': cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json' params_file = '/deep/u/zxie/rnnlm/13/params.pk' #cfg_file = '/deep/u/zxie/dnn/11/cfg.json' #params_file = '/deep/u/zxie/dnn/11/params.pk' cfg = load_config(cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) clm = model_class(None, model_hps, opt_hps, train=False, opt='nag') with open(params_file, 'rb') as fin: clm.from_file(fin) else: from srilm import LM from decoder_config import LM_ARPA_FILE print 'Loading %s...' % LM_ARPA_FILE clm = LM(LM_ARPA_FILE) print 'Done.' #clm = None for i in range(opts.start_file, opts.start_file + opts.numFiles): data_dict, alis, keys, _ = loader.loadDataFileDict(i) # For later alignments keys = sorted(keys) # For Switchboard filter if DATA_SUBSET == 'eval2000': if SWBD_SUBSET == 'swbd': keys = [k for k in keys if k.startswith('sw')] elif SWBD_SUBSET == 'callhome': keys = [k for k in keys if k.startswith('en')] ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i) ll_fid = open(ll_file, 'rb') probs_dict = pickle.load(ll_fid) # Parallelize decoding over utterances print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % ( NUM_CPUS, i) decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)( k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys) for k, (hyp, ref, hypscore, refscore, align) in zip(keys, decoded_utts): if refscore is None: refscore = 0.0 if hypscore is None: hypscore = 0.0 hyp = replace_contractions(hyp) fid.write(k + ' ' + ' '.join(hyp) + '\n') hyps.append(hyp) refs.append(ref) hypscores.append(hypscore) refscores.append(refscore) numphones.append(len(alis[k])) subsets.append('callhm' if k.startswith('en') else 'swbd') alignments.append(align) fid.close() # Pickle some values for computeStats.py pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb') pickle.dump(hyps, pkid) pickle.dump(refs, pkid) pickle.dump(hypscores, pkid) pickle.dump(refscores, pkid) pickle.dump(numphones, pkid) pickle.dump(subsets, pkid) pickle.dump(alignments, pkid) pkid.close()
def main(): # TODO Be able to pass in different models into training script as well? model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('epochs', type=int, help='number of epochs to train') parser.add_argument('--opt', default='nag', help='optimizer to use', choices=['cm', 'nag']) parser.add_argument('--anneal_factor', type=float, default=2.0, help='annealing factor after each epoch') parser.add_argument('out_dir', help='output directory to write model files') parser.add_argument('--cfg_file', help='cfg file for restarting run') model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) cfg = args.__dict__.copy() if not cfg['cfg_file']: cfg['cfg_file'] = pjoin(args.out_dir, 'cfg.json') add_run_data(cfg) dump_config(cfg, cfg['cfg_file']) # Load dataset #dataset = CharStream(CONTEXT, args.batch_size, step=1) dataset = UttCharStream(args.batch_size) # Construct network model = model_class(dataset, model_hps, opt_hps, opt=args.opt) # Run training for k in xrange(0, args.epochs): it = 0 while dataset.data_left(): model.run() if it % 1 == 0: logger.info('epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f' % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm)) #gnp.memory_allocators() #print gnp.memory_in_use() it += 1 if it % SAVE_PARAMS_EVERY == 0: params_file = pjoin(args.out_dir, 'params_save_every.pk') with open(params_file, 'wb') as fout: model.to_file(fout) # Anneal model.opt.alpha /= args.anneal_factor # Save final parameters params_file = pjoin(args.out_dir, 'params_epoch{0:02}.pk'.format(k+1)) with open(params_file, 'wb') as fout: model.to_file(fout) # Symlink param file to latest sym_file = pjoin(args.out_dir, 'params.pk') if os.path.exists(sym_file): os.remove(sym_file) os.symlink(params_file, sym_file) if k != args.epochs - 1: model.start_next_epoch()
def runSeq(opts): fid = open(opts.out_file, 'w') phone_map = get_char_map(opts.dataDir) print phone_map print len(phone_map) alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) hyps = list() refs = list() hypscores = list() refscores = list() numphones = list() subsets = list() alignments = list() if MODEL_TYPE != 'ngram': cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json' params_file = '/deep/u/zxie/rnnlm/13/params.pk' #cfg_file = '/deep/u/zxie/dnn/11/cfg.json' #params_file = '/deep/u/zxie/dnn/11/params.pk' cfg = load_config(cfg_file) model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() model_hps.set_from_dict(cfg) opt_hps.set_from_dict(cfg) clm = model_class(None, model_hps, opt_hps, train=False, opt='nag') with open(params_file, 'rb') as fin: clm.from_file(fin) else: from srilm import LM from decoder_config import LM_ARPA_FILE print 'Loading %s...' % LM_ARPA_FILE clm = LM(LM_ARPA_FILE) print 'Done.' #clm = None for i in range(opts.start_file, opts.start_file + opts.numFiles): data_dict, alis, keys, _ = loader.loadDataFileDict(i) # For later alignments keys = sorted(keys) # For Switchboard filter if DATA_SUBSET == 'eval2000': if SWBD_SUBSET == 'swbd': keys = [k for k in keys if k.startswith('sw')] elif SWBD_SUBSET == 'callhome': keys = [k for k in keys if k.startswith('en')] ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i) ll_fid = open(ll_file, 'rb') probs_dict = pickle.load(ll_fid) # Parallelize decoding over utterances print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % (NUM_CPUS, i) decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)(k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys) for k, (hyp, ref, hypscore, refscore, align) in zip(keys, decoded_utts): if refscore is None: refscore = 0.0 if hypscore is None: hypscore = 0.0 hyp = replace_contractions(hyp) fid.write(k + ' ' + ' '.join(hyp) + '\n') hyps.append(hyp) refs.append(ref) hypscores.append(hypscore) refscores.append(refscore) numphones.append(len(alis[k])) subsets.append('callhm' if k.startswith('en') else 'swbd') alignments.append(align) fid.close() # Pickle some values for computeStats.py pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb') pickle.dump(hyps, pkid) pickle.dump(refs, pkid) pickle.dump(hypscores, pkid) pickle.dump(refscores, pkid) pickle.dump(numphones, pkid) pickle.dump(subsets, pkid) pickle.dump(alignments, pkid) pkid.close()