Beispiel #1
0
def main():
    # TODO Be able to pass in different models into training script as well?

    model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
    opt_hps = OptimizerHyperparams()

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("epochs", type=int, help="number of epochs to train")
    parser.add_argument("--opt", default="nag", help="optimizer to use", choices=["cm", "nag"])
    parser.add_argument("--anneal_factor", type=float, default=2.0, help="annealing factor after each epoch")
    parser.add_argument("out_dir", help="output directory to write model files")
    parser.add_argument("--cfg_file", help="cfg file for restarting run")
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)
    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)
    cfg = args.__dict__.copy()
    if not cfg["cfg_file"]:
        cfg["cfg_file"] = pjoin(args.out_dir, "cfg.json")
    add_run_data(cfg)
    dump_config(cfg, cfg["cfg_file"])

    # Load dataset
    # dataset = CharStream(CONTEXT, args.batch_size, step=1)
    dataset = UttCharStream(args.batch_size)

    # Construct network
    model = model_class(dataset, model_hps, opt_hps, opt=args.opt)

    # Run training
    for k in xrange(0, args.epochs):
        it = 0
        while dataset.data_left():
            model.run()

            if it % 1 == 0:
                logger.info(
                    "epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f"
                    % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm)
                )
                # gnp.memory_allocators()
                # print gnp.memory_in_use()
            it += 1
            if it % SAVE_PARAMS_EVERY == 0:
                params_file = pjoin(args.out_dir, "params_save_every.pk")
                with open(params_file, "wb") as fout:
                    model.to_file(fout)

        # Anneal
        model.opt.alpha /= args.anneal_factor

        # Save final parameters
        params_file = pjoin(args.out_dir, "params_epoch{0:02}.pk".format(k + 1))
        with open(params_file, "wb") as fout:
            model.to_file(fout)

        # Symlink param file to latest
        sym_file = pjoin(args.out_dir, "params.pk")
        if os.path.exists(sym_file):
            os.remove(sym_file)
        os.symlink(params_file, sym_file)

        if k != args.epochs - 1:
            model.start_next_epoch()
Beispiel #2
0
    probs = probs / sum(probs)

    w = np.random.choice(range(model.hps.output_size), p=probs)
    char = chars[w]

    return char


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('cfg_file', help='config file with run data for model to use')
    args = parser.parse_args()

    cfg = load_config(args.cfg_file)
    model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
    opt_hps = OptimizerHyperparams()
    model_hps.set_from_dict(cfg)
    opt_hps.set_from_dict(cfg)
    cfg = CfgStruct(**cfg)

    SAMPLES = 100
    SAMPLE_LENGTH = 100
    # PARAM
    ALPHA = 1.0
    # FIXME PARAM
    LM_ORDER = CONTEXT + 1

    with open(CHAR_CORPUS_VOCAB_FILE, 'rb') as fin:
        char_inds = pickle.load(fin)
    chars = dict((v, k) for k, v in char_inds.iteritems())
Beispiel #3
0
Datei: rnn.py Projekt: comadan/nn
                        dhs[k][t-1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape((-1, 1)) / bsize

        return cost, self.grads


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    model_hps = RNNHyperparams()
    model_hps.hidden_size = 10
    opt_hps = OptimizerHyperparams()
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)

    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)

    dset = UttCharStream(args.batch_size)

    # Construct network
    model = RNN(dset, model_hps, opt_hps, opt='nag')
    model.run(check_grad=True)
Beispiel #4
0
                        (-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape(
                (-1, 1)) / bsize

        return cost, self.grads


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    model_hps = RNNHyperparams()
    model_hps.hidden_size = 10
    opt_hps = OptimizerHyperparams()
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)

    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)

    dset = UttCharStream(args.batch_size)

    # Construct network
    model = RNN(dset, model_hps, opt_hps, opt='nag')
    model.run(check_grad=True)
Beispiel #5
0
def runSeq(opts):
    fid = open(opts.out_file, 'w')
    phone_map = get_char_map(opts.dataDir)
    print phone_map
    print len(phone_map)

    alisDir = opts.alisDir if opts.alisDir else opts.dataDir
    loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir)

    hyps = list()
    refs = list()
    hypscores = list()
    refscores = list()
    numphones = list()
    subsets = list()
    alignments = list()

    if MODEL_TYPE != 'ngram':
        cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json'
        params_file = '/deep/u/zxie/rnnlm/13/params.pk'
        #cfg_file = '/deep/u/zxie/dnn/11/cfg.json'
        #params_file = '/deep/u/zxie/dnn/11/params.pk'

        cfg = load_config(cfg_file)
        model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
        opt_hps = OptimizerHyperparams()
        model_hps.set_from_dict(cfg)
        opt_hps.set_from_dict(cfg)

        clm = model_class(None, model_hps, opt_hps, train=False, opt='nag')
        with open(params_file, 'rb') as fin:
            clm.from_file(fin)
    else:
        from srilm import LM
        from decoder_config import LM_ARPA_FILE
        print 'Loading %s...' % LM_ARPA_FILE
        clm = LM(LM_ARPA_FILE)
        print 'Done.'
    #clm = None

    for i in range(opts.start_file, opts.start_file + opts.numFiles):
        data_dict, alis, keys, _ = loader.loadDataFileDict(i)
        # For later alignments
        keys = sorted(keys)

        # For Switchboard filter
        if DATA_SUBSET == 'eval2000':
            if SWBD_SUBSET == 'swbd':
                keys = [k for k in keys if k.startswith('sw')]
            elif SWBD_SUBSET == 'callhome':
                keys = [k for k in keys if k.startswith('en')]

        ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i)
        ll_fid = open(ll_file, 'rb')
        probs_dict = pickle.load(ll_fid)

        # Parallelize decoding over utterances
        print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % (
            NUM_CPUS, i)
        decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)(
            k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys)

        for k, (hyp, ref, hypscore, refscore,
                align) in zip(keys, decoded_utts):
            if refscore is None:
                refscore = 0.0
            if hypscore is None:
                hypscore = 0.0
            hyp = replace_contractions(hyp)
            fid.write(k + ' ' + ' '.join(hyp) + '\n')

            hyps.append(hyp)
            refs.append(ref)
            hypscores.append(hypscore)
            refscores.append(refscore)
            numphones.append(len(alis[k]))
            subsets.append('callhm' if k.startswith('en') else 'swbd')
            alignments.append(align)

    fid.close()

    # Pickle some values for computeStats.py
    pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb')
    pickle.dump(hyps, pkid)
    pickle.dump(refs, pkid)
    pickle.dump(hypscores, pkid)
    pickle.dump(refscores, pkid)
    pickle.dump(numphones, pkid)
    pickle.dump(subsets, pkid)
    pickle.dump(alignments, pkid)
    pkid.close()
Beispiel #6
0
def main():
    # TODO Be able to pass in different models into training script as well?

    model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
    opt_hps = OptimizerHyperparams()

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('epochs', type=int, help='number of epochs to train')
    parser.add_argument('--opt', default='nag', help='optimizer to use', choices=['cm', 'nag'])
    parser.add_argument('--anneal_factor', type=float, default=2.0, help='annealing factor after each epoch')
    parser.add_argument('out_dir', help='output directory to write model files')
    parser.add_argument('--cfg_file', help='cfg file for restarting run')
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)
    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)
    cfg = args.__dict__.copy()
    if not cfg['cfg_file']:
        cfg['cfg_file'] = pjoin(args.out_dir, 'cfg.json')
    add_run_data(cfg)
    dump_config(cfg, cfg['cfg_file'])

    # Load dataset
    #dataset = CharStream(CONTEXT, args.batch_size, step=1)
    dataset = UttCharStream(args.batch_size)

    # Construct network
    model = model_class(dataset, model_hps, opt_hps, opt=args.opt)

    # Run training
    for k in xrange(0, args.epochs):
        it = 0
        while dataset.data_left():
            model.run()

            if it % 1 == 0:
                logger.info('epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f' % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm))
                #gnp.memory_allocators()
                #print gnp.memory_in_use()
            it += 1
            if it % SAVE_PARAMS_EVERY == 0:
                params_file = pjoin(args.out_dir, 'params_save_every.pk')
                with open(params_file, 'wb') as fout:
                    model.to_file(fout)

        # Anneal
        model.opt.alpha /= args.anneal_factor

        # Save final parameters
        params_file = pjoin(args.out_dir, 'params_epoch{0:02}.pk'.format(k+1))
        with open(params_file, 'wb') as fout:
            model.to_file(fout)

        # Symlink param file to latest
        sym_file = pjoin(args.out_dir, 'params.pk')
        if os.path.exists(sym_file):
            os.remove(sym_file)
        os.symlink(params_file, sym_file)

        if k != args.epochs - 1:
            model.start_next_epoch()
Beispiel #7
0
def runSeq(opts):
    fid = open(opts.out_file, 'w')
    phone_map = get_char_map(opts.dataDir)
    print phone_map
    print len(phone_map)

    alisDir = opts.alisDir if opts.alisDir else opts.dataDir
    loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir)

    hyps = list()
    refs = list()
    hypscores = list()
    refscores = list()
    numphones = list()
    subsets = list()
    alignments = list()

    if MODEL_TYPE != 'ngram':
        cfg_file = '/deep/u/zxie/rnnlm/13/cfg.json'
        params_file = '/deep/u/zxie/rnnlm/13/params.pk'
        #cfg_file = '/deep/u/zxie/dnn/11/cfg.json'
        #params_file = '/deep/u/zxie/dnn/11/params.pk'

        cfg = load_config(cfg_file)
        model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
        opt_hps = OptimizerHyperparams()
        model_hps.set_from_dict(cfg)
        opt_hps.set_from_dict(cfg)

        clm = model_class(None, model_hps, opt_hps, train=False, opt='nag')
        with open(params_file, 'rb') as fin:
            clm.from_file(fin)
    else:
        from srilm import LM
        from decoder_config import LM_ARPA_FILE
        print 'Loading %s...' % LM_ARPA_FILE
        clm = LM(LM_ARPA_FILE)
        print 'Done.'
    #clm = None

    for i in range(opts.start_file, opts.start_file + opts.numFiles):
        data_dict, alis, keys, _ = loader.loadDataFileDict(i)
        # For later alignments
        keys = sorted(keys)

        # For Switchboard filter
        if DATA_SUBSET == 'eval2000':
            if SWBD_SUBSET == 'swbd':
                keys = [k for k in keys if k.startswith('sw')]
            elif SWBD_SUBSET == 'callhome':
                keys = [k for k in keys if k.startswith('en')]

        ll_file = pjoin(LIKELIHOODS_DIR, 'loglikelihoods_%d.pk' % i)
        ll_fid = open(ll_file, 'rb')
        probs_dict = pickle.load(ll_fid)

        # Parallelize decoding over utterances
        print 'Decoding utterances in parallel, n_jobs=%d, file=%d' % (NUM_CPUS, i)
        decoded_utts = Parallel(n_jobs=NUM_CPUS)(delayed(decode_utterance)(k, probs_dict[k], alis[k], phone_map, lm=clm) for k in keys)

        for k, (hyp, ref, hypscore, refscore, align) in zip(keys, decoded_utts):
            if refscore is None:
                refscore = 0.0
            if hypscore is None:
                hypscore = 0.0
            hyp = replace_contractions(hyp)
            fid.write(k + ' ' + ' '.join(hyp) + '\n')

            hyps.append(hyp)
            refs.append(ref)
            hypscores.append(hypscore)
            refscores.append(refscore)
            numphones.append(len(alis[k]))
            subsets.append('callhm' if k.startswith('en') else 'swbd')
            alignments.append(align)

    fid.close()

    # Pickle some values for computeStats.py
    pkid = open(opts.out_file.replace('.txt', '.pk'), 'wb')
    pickle.dump(hyps, pkid)
    pickle.dump(refs, pkid)
    pickle.dump(hypscores, pkid)
    pickle.dump(refscores, pkid)
    pickle.dump(numphones, pkid)
    pickle.dump(subsets, pkid)
    pickle.dump(alignments, pkid)
    pkid.close()