예제 #1
0
파일: train.py 프로젝트: xiamike/nn
def main():
    # TODO Be able to pass in different models into training script as well?

    model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
    opt_hps = OptimizerHyperparams()

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("epochs", type=int, help="number of epochs to train")
    parser.add_argument("--opt", default="nag", help="optimizer to use", choices=["cm", "nag"])
    parser.add_argument("--anneal_factor", type=float, default=2.0, help="annealing factor after each epoch")
    parser.add_argument("out_dir", help="output directory to write model files")
    parser.add_argument("--cfg_file", help="cfg file for restarting run")
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)
    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)
    cfg = args.__dict__.copy()
    if not cfg["cfg_file"]:
        cfg["cfg_file"] = pjoin(args.out_dir, "cfg.json")
    add_run_data(cfg)
    dump_config(cfg, cfg["cfg_file"])

    # Load dataset
    # dataset = CharStream(CONTEXT, args.batch_size, step=1)
    dataset = UttCharStream(args.batch_size)

    # Construct network
    model = model_class(dataset, model_hps, opt_hps, opt=args.opt)

    # Run training
    for k in xrange(0, args.epochs):
        it = 0
        while dataset.data_left():
            model.run()

            if it % 1 == 0:
                logger.info(
                    "epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f"
                    % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm)
                )
                # gnp.memory_allocators()
                # print gnp.memory_in_use()
            it += 1
            if it % SAVE_PARAMS_EVERY == 0:
                params_file = pjoin(args.out_dir, "params_save_every.pk")
                with open(params_file, "wb") as fout:
                    model.to_file(fout)

        # Anneal
        model.opt.alpha /= args.anneal_factor

        # Save final parameters
        params_file = pjoin(args.out_dir, "params_epoch{0:02}.pk".format(k + 1))
        with open(params_file, "wb") as fout:
            model.to_file(fout)

        # Symlink param file to latest
        sym_file = pjoin(args.out_dir, "params.pk")
        if os.path.exists(sym_file):
            os.remove(sym_file)
        os.symlink(params_file, sym_file)

        if k != args.epochs - 1:
            model.start_next_epoch()
예제 #2
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    parser.add_option('--cfg_file',
                      dest='cfg_file',
                      default=None,
                      help='File with settings from previously trained net')

    parser.add_option("--test",
                      action="store_true",
                      dest="test",
                      default=False)

    # Architecture
    parser.add_option("--layerSize",
                      dest="layerSize",
                      type="int",
                      default=1824)
    parser.add_option("--numLayers", dest="numLayers", type="int", default=5)
    parser.add_option("--temporalLayer",
                      dest="temporalLayer",
                      type="int",
                      default=3)

    # Optimization
    parser.add_option("--momentum",
                      dest="momentum",
                      type="float",
                      default=0.95)
    parser.add_option("--epochs", dest="epochs", type="int", default=20)
    parser.add_option("--step", dest="step", type="float", default=1e-5)
    parser.add_option(
        "--anneal",
        dest="anneal",
        type="float",
        default=1.3,
        help="Sets (learning rate := learning rate / anneal) after each epoch."
    )
    parser.add_option(
        '--reg',
        dest='reg',
        type='float',
        default=0.0,
        help='lambda for L2 regularization of the weight matrices')

    # Data
    parser.add_option("--dataDir",
                      dest="dataDir",
                      type="string",
                      default=TRAIN_DATA_DIR['fbank'])
    parser.add_option('--alisDir',
                      dest='alisDir',
                      type='string',
                      default=TRAIN_ALIS_DIR)
    parser.add_option('--startFile',
                      dest='startFile',
                      type='int',
                      default=1,
                      help='Start file for running testing')
    parser.add_option("--numFiles", dest="numFiles", type="int", default=384)
    parser.add_option("--inputDim",
                      dest="inputDim",
                      type="int",
                      default=41 * 15)
    parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15)
    parser.add_option("--outputDim", dest="outputDim", type="int", default=35)
    parser.add_option("--maxUttLen",
                      dest="maxUttLen",
                      type="int",
                      default=MAX_UTT_LEN)

    # Save/Load
    parser.add_option(
        '--save_every',
        dest='save_every',
        type='int',
        default=10,
        help='During training, save parameters every x number of files')

    parser.add_option('--run_desc',
                      dest='run_desc',
                      type='string',
                      default='',
                      help='Description of experiment run')

    (opts, args) = parser.parse_args(args)

    if opts.cfg_file:
        cfg = load_config(opts.cfg_file)
    else:
        cfg = vars(opts)

    # These config values should be updated every time
    cfg['host'] = get_hostname()
    cfg['git_rev'] = get_git_revision()
    cfg['pid'] = os.getpid()

    # Create experiment output directory

    if not opts.cfg_file:
        time_string = str(TimeString())
        output_dir = pjoin(RUN_DIR, time_string)
        cfg['output_dir'] = output_dir
        if not os.path.exists(output_dir):
            print 'Creating %s' % output_dir
            os.makedirs(output_dir)
        opts.cfg_file = pjoin(output_dir, 'cfg.json')
    else:
        output_dir = cfg['output_dir']

    cfg['output_dir'] = output_dir
    cfg['in_file'] = pjoin(output_dir, 'params.pk')
    cfg['out_file'] = pjoin(output_dir, 'params.pk')
    cfg['test'] = opts.test
    if opts.test:
        cfg['dataDir'] = opts.dataDir
        cfg['numFiles'] = opts.numFiles
        cfg['startFile'] = opts.startFile
    if 'reg' not in cfg:
        cfg['reg'] = 0.0

    # Logging

    logging.basicConfig(filename=pjoin(output_dir, 'train.log'),
                        level=logging.DEBUG)
    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.info('Running on %s' % cfg['host'])

    # seed for debugging, turn off when stable
    np.random.seed(33)
    import random
    random.seed(33)

    if 'CUDA_DEVICE' in os.environ:
        cm.cuda_set_device(int(os.environ['CUDA_DEVICE']))
    else:
        cm.cuda_set_device(0)  # Default

    opts = CfgStruct(**cfg)

    # Testing
    if opts.test:
        test(opts)
        return

    alisDir = opts.alisDir if opts.alisDir else opts.dataDir
    loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir)

    nn = rnnet.NNet(opts.inputDim,
                    opts.outputDim,
                    opts.layerSize,
                    opts.numLayers,
                    opts.maxUttLen,
                    temporalLayer=opts.temporalLayer,
                    reg=opts.reg)
    nn.initParams()

    SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum)

    # Dump config
    cfg['param_count'] = nn.paramCount()
    dump_config(cfg, opts.cfg_file)

    # Training
    epoch_file = pjoin(output_dir, 'epoch')
    if os.path.exists(epoch_file):
        start_epoch = int(open(epoch_file, 'r').read()) + 1
    else:
        start_epoch = 0

    # Load model if specified
    if os.path.exists(opts.in_file):
        with open(opts.in_file, 'r') as fid:
            SGD.fromFile(fid)
            SGD.alpha = SGD.alpha / (opts.anneal**start_epoch)
            nn.fromFile(fid)

    num_files_file = pjoin(output_dir, 'num_files')

    for k in range(start_epoch, opts.epochs):
        perm = np.random.permutation(opts.numFiles) + 1
        loader.loadDataFileAsynch(perm[0])

        file_start = 0
        if k == start_epoch:
            if os.path.exists(num_files_file):
                file_start = int(open(num_files_file, 'r').read().strip())
                logger.info('Starting from file %d, epoch %d' %
                            (file_start, start_epoch))
        else:
            open(num_files_file, 'w').write(str(file_start))

        for i in xrange(file_start, perm.shape[0]):
            start = time.time()
            data_dict, alis, keys, sizes = loader.getDataAsynch()
            # Prefetch
            if i + 1 < perm.shape[0]:
                loader.loadDataFileAsynch(perm[i + 1])
            SGD.run(data_dict, alis, keys, sizes)
            end = time.time()
            logger.info('File time %f' % (end - start))

            # Save parameters and cost
            if (i + 1) % opts.save_every == 0:
                logger.info('Saving parameters')
                with open(opts.out_file, 'wb') as fid:
                    SGD.toFile(fid)
                    nn.toFile(fid)
                    open(num_files_file, 'w').write('%d' % (i + 1))
                logger.info('Done saving parameters')
                with open(pjoin(output_dir, 'last_cost'), 'w') as fid:
                    if opts.reg > 0.0:
                        fid.write(str(SGD.expcost[-1] - SGD.regcost[-1]))
                    else:
                        fid.write(str(SGD.expcost[-1]))

        # Save epoch completed
        open(pjoin(output_dir, 'epoch'), 'w').write(str(k))

        # Save parameters for the epoch
        with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid:
            SGD.toFile(fid)
            nn.toFile(fid)

        SGD.alpha = SGD.alpha / opts.anneal

    # Run now complete, touch sentinel file
    touch_file(pjoin(output_dir, 'sentinel'))
예제 #3
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    parser.add_option('--cfg_file', dest='cfg_file', default=None,
            help='File with settings from previously trained net')

    parser.add_option(
        "--test", action="store_true", dest="test", default=False)

    # Architecture
    parser.add_option(
        "--layerSize", dest="layerSize", type="int", default=1824)
    parser.add_option("--numLayers", dest="numLayers", type="int", default=5)
    parser.add_option(
        "--temporalLayer", dest="temporalLayer", type="int", default=3)

    # Optimization
    parser.add_option("--momentum", dest="momentum", type="float",
                      default=0.95)
    parser.add_option("--epochs", dest="epochs", type="int", default=20)
    parser.add_option("--step", dest="step", type="float", default=1e-5)
    parser.add_option("--anneal", dest="anneal", type="float", default=1.3,
                      help="Sets (learning rate := learning rate / anneal) after each epoch.")
    parser.add_option('--reg', dest='reg', type='float', default=0.0,
                      help='lambda for L2 regularization of the weight matrices')

    # Data
    parser.add_option("--dataDir", dest="dataDir", type="string",
                      default=TRAIN_DATA_DIR['fbank'])
    parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR)
    parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing')
    parser.add_option("--numFiles", dest="numFiles", type="int", default=384)
    parser.add_option(
        "--inputDim", dest="inputDim", type="int", default=41 * 15)
    parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15)
    parser.add_option("--outputDim", dest="outputDim", type="int", default=35)
    parser.add_option(
        "--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN)

    # Save/Load
    parser.add_option('--save_every', dest='save_every', type='int',
            default=10, help='During training, save parameters every x number of files')

    parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run')

    (opts, args) = parser.parse_args(args)

    if opts.cfg_file:
        cfg = load_config(opts.cfg_file)
    else:
        cfg = vars(opts)

    # These config values should be updated every time
    cfg['host'] = get_hostname()
    cfg['git_rev'] = get_git_revision()
    cfg['pid'] = os.getpid()

    # Create experiment output directory

    if not opts.cfg_file:
        time_string = str(TimeString())
        output_dir = pjoin(RUN_DIR, time_string)
        cfg['output_dir'] = output_dir
        if not os.path.exists(output_dir):
            print 'Creating %s' % output_dir
            os.makedirs(output_dir)
        opts.cfg_file = pjoin(output_dir, 'cfg.json')
    else:
        output_dir = cfg['output_dir']

    cfg['output_dir'] = output_dir
    cfg['in_file'] = pjoin(output_dir, 'params.pk')
    cfg['out_file'] = pjoin(output_dir, 'params.pk')
    cfg['test'] = opts.test
    if opts.test:
        cfg['dataDir'] = opts.dataDir
        cfg['numFiles'] = opts.numFiles
        cfg['startFile'] = opts.startFile
    if 'reg' not in cfg:
        cfg['reg'] = 0.0

    # Logging

    logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG)
    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.info('Running on %s' % cfg['host'])

    # seed for debugging, turn off when stable
    np.random.seed(33)
    import random
    random.seed(33)

    if 'CUDA_DEVICE' in os.environ:
        cm.cuda_set_device(int(os.environ['CUDA_DEVICE']))
    else:
        cm.cuda_set_device(0)  # Default

    opts = CfgStruct(**cfg)

    # Testing
    if opts.test:
        test(opts)
        return

    alisDir = opts.alisDir if opts.alisDir else opts.dataDir
    loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir)

    nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers,
                    opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg)
    nn.initParams()

    SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum)

    # Dump config
    cfg['param_count'] = nn.paramCount()
    dump_config(cfg, opts.cfg_file)

    # Training
    epoch_file = pjoin(output_dir, 'epoch')
    if os.path.exists(epoch_file):
        start_epoch = int(open(epoch_file, 'r').read()) + 1
    else:
        start_epoch = 0

    # Load model if specified
    if os.path.exists(opts.in_file):
        with open(opts.in_file, 'r') as fid:
            SGD.fromFile(fid)
            SGD.alpha = SGD.alpha / (opts.anneal ** start_epoch)
            nn.fromFile(fid)

    num_files_file = pjoin(output_dir, 'num_files')

    for k in range(start_epoch, opts.epochs):
        perm = np.random.permutation(opts.numFiles) + 1
        loader.loadDataFileAsynch(perm[0])

        file_start = 0
        if k == start_epoch:
            if os.path.exists(num_files_file):
                file_start = int(open(num_files_file, 'r').read().strip())
                logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch))
        else:
            open(num_files_file, 'w').write(str(file_start))

        for i in xrange(file_start, perm.shape[0]):
            start = time.time()
            data_dict, alis, keys, sizes = loader.getDataAsynch()
            # Prefetch
            if i + 1 < perm.shape[0]:
                loader.loadDataFileAsynch(perm[i + 1])
            SGD.run(data_dict, alis, keys, sizes)
            end = time.time()
            logger.info('File time %f' % (end - start))

            # Save parameters and cost
            if (i+1) % opts.save_every == 0:
                logger.info('Saving parameters')
                with open(opts.out_file, 'wb') as fid:
                    SGD.toFile(fid)
                    nn.toFile(fid)
                    open(num_files_file, 'w').write('%d' % (i+1))
                logger.info('Done saving parameters')
                with open(pjoin(output_dir, 'last_cost'), 'w') as fid:
                    if opts.reg > 0.0:
                        fid.write(str(SGD.expcost[-1] - SGD.regcost[-1]))
                    else:
                        fid.write(str(SGD.expcost[-1]))

        # Save epoch completed
        open(pjoin(output_dir, 'epoch'), 'w').write(str(k))

        # Save parameters for the epoch
        with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid:
            SGD.toFile(fid)
            nn.toFile(fid)

        SGD.alpha = SGD.alpha / opts.anneal

    # Run now complete, touch sentinel file
    touch_file(pjoin(output_dir, 'sentinel'))
예제 #4
0
def main():
    # TODO Be able to pass in different models into training script as well?

    model_class, model_hps = get_model_class_and_params(MODEL_TYPE)
    opt_hps = OptimizerHyperparams()

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('epochs', type=int, help='number of epochs to train')
    parser.add_argument('--opt', default='nag', help='optimizer to use', choices=['cm', 'nag'])
    parser.add_argument('--anneal_factor', type=float, default=2.0, help='annealing factor after each epoch')
    parser.add_argument('out_dir', help='output directory to write model files')
    parser.add_argument('--cfg_file', help='cfg file for restarting run')
    model_hps.add_to_argparser(parser)
    opt_hps.add_to_argparser(parser)
    args = parser.parse_args()

    model_hps.set_from_args(args)
    opt_hps.set_from_args(args)
    cfg = args.__dict__.copy()
    if not cfg['cfg_file']:
        cfg['cfg_file'] = pjoin(args.out_dir, 'cfg.json')
    add_run_data(cfg)
    dump_config(cfg, cfg['cfg_file'])

    # Load dataset
    #dataset = CharStream(CONTEXT, args.batch_size, step=1)
    dataset = UttCharStream(args.batch_size)

    # Construct network
    model = model_class(dataset, model_hps, opt_hps, opt=args.opt)

    # Run training
    for k in xrange(0, args.epochs):
        it = 0
        while dataset.data_left():
            model.run()

            if it % 1 == 0:
                logger.info('epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f' % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm))
                #gnp.memory_allocators()
                #print gnp.memory_in_use()
            it += 1
            if it % SAVE_PARAMS_EVERY == 0:
                params_file = pjoin(args.out_dir, 'params_save_every.pk')
                with open(params_file, 'wb') as fout:
                    model.to_file(fout)

        # Anneal
        model.opt.alpha /= args.anneal_factor

        # Save final parameters
        params_file = pjoin(args.out_dir, 'params_epoch{0:02}.pk'.format(k+1))
        with open(params_file, 'wb') as fout:
            model.to_file(fout)

        # Symlink param file to latest
        sym_file = pjoin(args.out_dir, 'params.pk')
        if os.path.exists(sym_file):
            os.remove(sym_file)
        os.symlink(params_file, sym_file)

        if k != args.epochs - 1:
            model.start_next_epoch()