def main(): # TODO Be able to pass in different models into training script as well? model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("epochs", type=int, help="number of epochs to train") parser.add_argument("--opt", default="nag", help="optimizer to use", choices=["cm", "nag"]) parser.add_argument("--anneal_factor", type=float, default=2.0, help="annealing factor after each epoch") parser.add_argument("out_dir", help="output directory to write model files") parser.add_argument("--cfg_file", help="cfg file for restarting run") model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) cfg = args.__dict__.copy() if not cfg["cfg_file"]: cfg["cfg_file"] = pjoin(args.out_dir, "cfg.json") add_run_data(cfg) dump_config(cfg, cfg["cfg_file"]) # Load dataset # dataset = CharStream(CONTEXT, args.batch_size, step=1) dataset = UttCharStream(args.batch_size) # Construct network model = model_class(dataset, model_hps, opt_hps, opt=args.opt) # Run training for k in xrange(0, args.epochs): it = 0 while dataset.data_left(): model.run() if it % 1 == 0: logger.info( "epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f" % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm) ) # gnp.memory_allocators() # print gnp.memory_in_use() it += 1 if it % SAVE_PARAMS_EVERY == 0: params_file = pjoin(args.out_dir, "params_save_every.pk") with open(params_file, "wb") as fout: model.to_file(fout) # Anneal model.opt.alpha /= args.anneal_factor # Save final parameters params_file = pjoin(args.out_dir, "params_epoch{0:02}.pk".format(k + 1)) with open(params_file, "wb") as fout: model.to_file(fout) # Symlink param file to latest sym_file = pjoin(args.out_dir, "params.pk") if os.path.exists(sym_file): os.remove(sym_file) os.symlink(params_file, sym_file) if k != args.epochs - 1: model.start_next_epoch()
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--cfg_file', dest='cfg_file', default=None, help='File with settings from previously trained net') parser.add_option("--test", action="store_true", dest="test", default=False) # Architecture parser.add_option("--layerSize", dest="layerSize", type="int", default=1824) parser.add_option("--numLayers", dest="numLayers", type="int", default=5) parser.add_option("--temporalLayer", dest="temporalLayer", type="int", default=3) # Optimization parser.add_option("--momentum", dest="momentum", type="float", default=0.95) parser.add_option("--epochs", dest="epochs", type="int", default=20) parser.add_option("--step", dest="step", type="float", default=1e-5) parser.add_option( "--anneal", dest="anneal", type="float", default=1.3, help="Sets (learning rate := learning rate / anneal) after each epoch." ) parser.add_option( '--reg', dest='reg', type='float', default=0.0, help='lambda for L2 regularization of the weight matrices') # Data parser.add_option("--dataDir", dest="dataDir", type="string", default=TRAIN_DATA_DIR['fbank']) parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR) parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing') parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option("--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=35) parser.add_option("--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN) # Save/Load parser.add_option( '--save_every', dest='save_every', type='int', default=10, help='During training, save parameters every x number of files') parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run') (opts, args) = parser.parse_args(args) if opts.cfg_file: cfg = load_config(opts.cfg_file) else: cfg = vars(opts) # These config values should be updated every time cfg['host'] = get_hostname() cfg['git_rev'] = get_git_revision() cfg['pid'] = os.getpid() # Create experiment output directory if not opts.cfg_file: time_string = str(TimeString()) output_dir = pjoin(RUN_DIR, time_string) cfg['output_dir'] = output_dir if not os.path.exists(output_dir): print 'Creating %s' % output_dir os.makedirs(output_dir) opts.cfg_file = pjoin(output_dir, 'cfg.json') else: output_dir = cfg['output_dir'] cfg['output_dir'] = output_dir cfg['in_file'] = pjoin(output_dir, 'params.pk') cfg['out_file'] = pjoin(output_dir, 'params.pk') cfg['test'] = opts.test if opts.test: cfg['dataDir'] = opts.dataDir cfg['numFiles'] = opts.numFiles cfg['startFile'] = opts.startFile if 'reg' not in cfg: cfg['reg'] = 0.0 # Logging logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % cfg['host']) # seed for debugging, turn off when stable np.random.seed(33) import random random.seed(33) if 'CUDA_DEVICE' in os.environ: cm.cuda_set_device(int(os.environ['CUDA_DEVICE'])) else: cm.cuda_set_device(0) # Default opts = CfgStruct(**cfg) # Testing if opts.test: test(opts) return alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) # Dump config cfg['param_count'] = nn.paramCount() dump_config(cfg, opts.cfg_file) # Training epoch_file = pjoin(output_dir, 'epoch') if os.path.exists(epoch_file): start_epoch = int(open(epoch_file, 'r').read()) + 1 else: start_epoch = 0 # Load model if specified if os.path.exists(opts.in_file): with open(opts.in_file, 'r') as fid: SGD.fromFile(fid) SGD.alpha = SGD.alpha / (opts.anneal**start_epoch) nn.fromFile(fid) num_files_file = pjoin(output_dir, 'num_files') for k in range(start_epoch, opts.epochs): perm = np.random.permutation(opts.numFiles) + 1 loader.loadDataFileAsynch(perm[0]) file_start = 0 if k == start_epoch: if os.path.exists(num_files_file): file_start = int(open(num_files_file, 'r').read().strip()) logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch)) else: open(num_files_file, 'w').write(str(file_start)) for i in xrange(file_start, perm.shape[0]): start = time.time() data_dict, alis, keys, sizes = loader.getDataAsynch() # Prefetch if i + 1 < perm.shape[0]: loader.loadDataFileAsynch(perm[i + 1]) SGD.run(data_dict, alis, keys, sizes) end = time.time() logger.info('File time %f' % (end - start)) # Save parameters and cost if (i + 1) % opts.save_every == 0: logger.info('Saving parameters') with open(opts.out_file, 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) open(num_files_file, 'w').write('%d' % (i + 1)) logger.info('Done saving parameters') with open(pjoin(output_dir, 'last_cost'), 'w') as fid: if opts.reg > 0.0: fid.write(str(SGD.expcost[-1] - SGD.regcost[-1])) else: fid.write(str(SGD.expcost[-1])) # Save epoch completed open(pjoin(output_dir, 'epoch'), 'w').write(str(k)) # Save parameters for the epoch with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) SGD.alpha = SGD.alpha / opts.anneal # Run now complete, touch sentinel file touch_file(pjoin(output_dir, 'sentinel'))
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--cfg_file', dest='cfg_file', default=None, help='File with settings from previously trained net') parser.add_option( "--test", action="store_true", dest="test", default=False) # Architecture parser.add_option( "--layerSize", dest="layerSize", type="int", default=1824) parser.add_option("--numLayers", dest="numLayers", type="int", default=5) parser.add_option( "--temporalLayer", dest="temporalLayer", type="int", default=3) # Optimization parser.add_option("--momentum", dest="momentum", type="float", default=0.95) parser.add_option("--epochs", dest="epochs", type="int", default=20) parser.add_option("--step", dest="step", type="float", default=1e-5) parser.add_option("--anneal", dest="anneal", type="float", default=1.3, help="Sets (learning rate := learning rate / anneal) after each epoch.") parser.add_option('--reg', dest='reg', type='float', default=0.0, help='lambda for L2 regularization of the weight matrices') # Data parser.add_option("--dataDir", dest="dataDir", type="string", default=TRAIN_DATA_DIR['fbank']) parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR) parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing') parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option( "--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=35) parser.add_option( "--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN) # Save/Load parser.add_option('--save_every', dest='save_every', type='int', default=10, help='During training, save parameters every x number of files') parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run') (opts, args) = parser.parse_args(args) if opts.cfg_file: cfg = load_config(opts.cfg_file) else: cfg = vars(opts) # These config values should be updated every time cfg['host'] = get_hostname() cfg['git_rev'] = get_git_revision() cfg['pid'] = os.getpid() # Create experiment output directory if not opts.cfg_file: time_string = str(TimeString()) output_dir = pjoin(RUN_DIR, time_string) cfg['output_dir'] = output_dir if not os.path.exists(output_dir): print 'Creating %s' % output_dir os.makedirs(output_dir) opts.cfg_file = pjoin(output_dir, 'cfg.json') else: output_dir = cfg['output_dir'] cfg['output_dir'] = output_dir cfg['in_file'] = pjoin(output_dir, 'params.pk') cfg['out_file'] = pjoin(output_dir, 'params.pk') cfg['test'] = opts.test if opts.test: cfg['dataDir'] = opts.dataDir cfg['numFiles'] = opts.numFiles cfg['startFile'] = opts.startFile if 'reg' not in cfg: cfg['reg'] = 0.0 # Logging logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % cfg['host']) # seed for debugging, turn off when stable np.random.seed(33) import random random.seed(33) if 'CUDA_DEVICE' in os.environ: cm.cuda_set_device(int(os.environ['CUDA_DEVICE'])) else: cm.cuda_set_device(0) # Default opts = CfgStruct(**cfg) # Testing if opts.test: test(opts) return alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) # Dump config cfg['param_count'] = nn.paramCount() dump_config(cfg, opts.cfg_file) # Training epoch_file = pjoin(output_dir, 'epoch') if os.path.exists(epoch_file): start_epoch = int(open(epoch_file, 'r').read()) + 1 else: start_epoch = 0 # Load model if specified if os.path.exists(opts.in_file): with open(opts.in_file, 'r') as fid: SGD.fromFile(fid) SGD.alpha = SGD.alpha / (opts.anneal ** start_epoch) nn.fromFile(fid) num_files_file = pjoin(output_dir, 'num_files') for k in range(start_epoch, opts.epochs): perm = np.random.permutation(opts.numFiles) + 1 loader.loadDataFileAsynch(perm[0]) file_start = 0 if k == start_epoch: if os.path.exists(num_files_file): file_start = int(open(num_files_file, 'r').read().strip()) logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch)) else: open(num_files_file, 'w').write(str(file_start)) for i in xrange(file_start, perm.shape[0]): start = time.time() data_dict, alis, keys, sizes = loader.getDataAsynch() # Prefetch if i + 1 < perm.shape[0]: loader.loadDataFileAsynch(perm[i + 1]) SGD.run(data_dict, alis, keys, sizes) end = time.time() logger.info('File time %f' % (end - start)) # Save parameters and cost if (i+1) % opts.save_every == 0: logger.info('Saving parameters') with open(opts.out_file, 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) open(num_files_file, 'w').write('%d' % (i+1)) logger.info('Done saving parameters') with open(pjoin(output_dir, 'last_cost'), 'w') as fid: if opts.reg > 0.0: fid.write(str(SGD.expcost[-1] - SGD.regcost[-1])) else: fid.write(str(SGD.expcost[-1])) # Save epoch completed open(pjoin(output_dir, 'epoch'), 'w').write(str(k)) # Save parameters for the epoch with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) SGD.alpha = SGD.alpha / opts.anneal # Run now complete, touch sentinel file touch_file(pjoin(output_dir, 'sentinel'))
def main(): # TODO Be able to pass in different models into training script as well? model_class, model_hps = get_model_class_and_params(MODEL_TYPE) opt_hps = OptimizerHyperparams() parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('epochs', type=int, help='number of epochs to train') parser.add_argument('--opt', default='nag', help='optimizer to use', choices=['cm', 'nag']) parser.add_argument('--anneal_factor', type=float, default=2.0, help='annealing factor after each epoch') parser.add_argument('out_dir', help='output directory to write model files') parser.add_argument('--cfg_file', help='cfg file for restarting run') model_hps.add_to_argparser(parser) opt_hps.add_to_argparser(parser) args = parser.parse_args() model_hps.set_from_args(args) opt_hps.set_from_args(args) cfg = args.__dict__.copy() if not cfg['cfg_file']: cfg['cfg_file'] = pjoin(args.out_dir, 'cfg.json') add_run_data(cfg) dump_config(cfg, cfg['cfg_file']) # Load dataset #dataset = CharStream(CONTEXT, args.batch_size, step=1) dataset = UttCharStream(args.batch_size) # Construct network model = model_class(dataset, model_hps, opt_hps, opt=args.opt) # Run training for k in xrange(0, args.epochs): it = 0 while dataset.data_left(): model.run() if it % 1 == 0: logger.info('epoch %d, iter %d, obj=%f, exp_obj=%f, gnorm=%f' % (k, it, model.opt.costs[-1], model.opt.expcosts[-1], model.opt.grad_norm)) #gnp.memory_allocators() #print gnp.memory_in_use() it += 1 if it % SAVE_PARAMS_EVERY == 0: params_file = pjoin(args.out_dir, 'params_save_every.pk') with open(params_file, 'wb') as fout: model.to_file(fout) # Anneal model.opt.alpha /= args.anneal_factor # Save final parameters params_file = pjoin(args.out_dir, 'params_epoch{0:02}.pk'.format(k+1)) with open(params_file, 'wb') as fout: model.to_file(fout) # Symlink param file to latest sym_file = pjoin(args.out_dir, 'params.pk') if os.path.exists(sym_file): os.remove(sym_file) os.symlink(params_file, sym_file) if k != args.epochs - 1: model.start_next_epoch()