def test_resume_training( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test restoring a training session from a checkpoint.""" t0 = time.time() logger.info(f'Testing resuming training') configs_ = copy.deepcopy(configs) assert configs_.get('restore_from', None) is not None train_out = train(configs_, make_plots=make_plots, verbose=False, num_chains=4, **kwargs) dynamics = train_out.dynamics logdir = train_out.logdir x = train_out.x runs_dir = os.path.join(logdir, 'inference') run_out = None if RANK == 0: run_out = run(dynamics, configs_, x=x, runs_dir=runs_dir) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_separate_networks( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test training on separate networks.""" t0 = time.time() logger.info(f'Testing separate networks') configs_ = dict(copy.deepcopy(configs)) configs_['dynamics_config']['separate_networks'] = True train_out = train(configs_, make_plots=make_plots, verbose=False, num_chains=4, **kwargs) x = train_out.x dynamics = train_out.dynamics logdir = train_out.logdir runs_dir = os.path.join(logdir, 'inference') run_out = None if RANK == 0: run_out = run(dynamics, configs_, x=x, runs_dir=runs_dir, make_plots=make_plots) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def main(configs: dict[str, Any]): """Main method for training.""" # tf.keras.backend.set_floatx('float32') import numpy as np custom_betas = None if configs.get('discrete_beta', False): b0 = configs.get('beta_init', None) # type: float b1 = configs.get('beta_final', None) # type: float db = b1 - b0 # per_step = (b1 - b0) // configs.get('train_steps', None) per_step = int(configs.get('train_steps', None) // (b1 + 1 - b0)) custom_betas = [] for b in range(int(b0), int(b1 + 1)): betas_ = b * np.ones(per_step) custom_betas.append(betas_) custom_betas = np.stack(np.array(custom_betas)) custom_betas = tf.convert_to_tensor(custom_betas.flatten(), dtype=tf.keras.backend.floatx()) logger.info(f'Using discrete betas!!!') logger.info(f'custom_betas: {custom_betas}') # -- Train model ---------------------------------------------------- train_out = train(configs=configs, make_plots=True, custom_betas=custom_betas) x = train_out.x dynamics = train_out.dynamics configs = train_out.configs # ------------------------------------------------------------------ # -- Run inference on trained model --------------------------------- run_steps = configs.get('run_steps', 20000) if run_steps > 0: x = tf.random.uniform(x.shape, *(-PI, PI)) beta = configs.get('beta_final') nchains = configs.get('num_chains', configs.get('nchains', None)) if nchains is not None: x = x[:nchains] # if configs.get('small_batch', False): # batch_size = 256 # old_shape = configs['dynamics_config']['x_shape'] # new_shape = (batch_size, *old_shape[1:]) # configs['dynamics_config']['x_shape'] = new_shape # dynamics = build_dynamics(configs) # x = x[:batch_size] _ = run(dynamics, configs, x, beta=beta, make_plots=True, therm_frac=0.1, num_chains=nchains, save_x=False)
def main(configs: dict[str, Any], **kwargs): t0 = time.time() train_out = train(configs, **kwargs) run_out = None if RANK == 0: run_out = run(train_out.dynamics, configs, make_plots=True, runs_dir=os.path.join(train_out.logdir, 'inference')) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_single_network(flags: AttrDict): """Test training on single network.""" flags.dynamics_config.separate_networks = False x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def test_resume_training(log_dir: str): """Test restoring a training session from a checkpoint.""" flags = AttrDict( dict(io.loadz(os.path.join(log_dir, 'training', 'FLAGS.z')))) flags.log_dir = log_dir flags.train_steps += flags.get('train_steps', 10) x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def test_separate_networks(flags: AttrDict): """Test training on separate networks.""" flags.hmc_steps = 0 # flags.log_dir = None flags.log_dir = io.make_log_dir(flags, 'GaugeModel', LOG_FILE) flags.dynamics_config.separate_networks = True flags.compile = False x, dynamics, train_data, flags = train(flags) beta = flags.get('beta', 1.) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def run_from_log_dir(log_dir: str, net_weights: NetWeights, run_steps=5000): configs = load_configs_from_log_dir(log_dir) if 'x_shape' not in configs['dynamics_config'].keys(): x_shape = configs['dynamics_config'].pop('lattice_shape') configs['dynamics_config']['x_shape'] = x_shape beta = configs['beta_final'] nwstr = 'nw' + ''.join([f'{int(i)}' for i in net_weights]) run_dir = os.path.join(PROJECT_DIR, 'l2hmc_function_tests', 'inference', f'beta{beta}', f'{nwstr}') if os.path.isdir(run_dir): io.log(f'EXISTING RUN FOUND AT: {run_dir}, SKIPPING!', style='bold red') io.check_else_make_dir(run_dir) log_dir = configs.get('log_dir', None) configs['log_dir_orig'] = log_dir configs['log_dir'] = run_dir configs['run_steps'] = run_steps configs = AttrDict(configs) dynamics = build_dynamics(configs) xnet, vnet = dynamics._load_networks(log_dir) dynamics.xnet = xnet dynamics.vnet = vnet io.log(f'Original dynamics.net_weights: {dynamics.net_weights}') io.log(f'Setting `dynamics.net_weights` to: {net_weights}') dynamics._set_net_weights(net_weights) dynamics.net_weights = net_weights io.log(f'Now, dynamics.net_weights: {dynamics.net_weights}') dynamics, train_data, x = short_training(1000, beta, log_dir=log_dir, dynamics=dynamics, x=None) inference_results = run(dynamics, configs, beta=beta, runs_dir=run_dir, md_steps=500, make_plots=True, therm_frac=0.2, num_chains=16) return inference_results
def test_conv_net( configs: dict[str, Any], make_plots: bool = True, **kwargs, ) -> TestOutputs: """Test convolutional networks.""" t0 = time.time() logger.info(f'Testing convolutional network') configs = AttrDict(**dict(copy.deepcopy(configs))) # flags.use_conv_net = True configs['dynamics_config']['use_conv_net'] = True # configs['conv_config'] = dict( # sizes=[2, 2], # filters=[4, 8], # pool_sizes=[2, 2], # use_batch_norm=True, # conv_paddings=['valid', 'valid'], # conv_activations=['relu', 'relu'], # input_shape=configs['dynamics_config']['x_shape'][1:], # ) train_out = train(configs, make_plots=make_plots, num_chains=4, verbose=False, **kwargs) runs_dir = os.path.join(train_out.logdir, 'inference') run_out = None if RANK == 0: run_out = run(train_out.dynamics, configs, x=train_out.x, runs_dir=runs_dir, make_plots=make_plots) logger.info(f'Passed! Took: {time.time() - t0:.4f} seconds') return TestOutputs(train_out, run_out)
def test_conv_net(flags: AttrDict): """Test convolutional networks.""" # flags.use_conv_net = True flags['dynamics_config']['use_conv_net'] = True flags.conv_config = ConvolutionConfig( sizes=[2, 2], filters=[16, 32], pool_sizes=[2, 2], use_batch_norm=True, conv_paddings=['valid', 'valid'], conv_activations=['relu', 'relu'], input_shape=flags['dynamics_config']['lattice_shape'][1:], ) x, dynamics, train_data, flags = train(flags) dynamics, run_data, x = run(dynamics, flags, x=x) return AttrDict({ 'x': x, 'flags': flags, 'log_dir': flags.log_dir, 'dynamics': dynamics, 'run_data': run_data, 'train_data': train_data, })
def main(args): """Main method for training.""" hmc_steps = args.get('hmc_steps', 0) tf.keras.backend.set_floatx('float32') log_file = os.path.join(os.getcwd(), 'log_dirs.txt') x = None log_dir = args.get('log_dir', None) beta_init = args.get('beta_init', None) beta_final = args.get('beta_final', None) if log_dir is not None: # we want to restore from latest checkpoint train_steps = args.get('train_steps', None) args = restore_flags(args, os.path.join(args.log_dir, 'training')) args.train_steps = train_steps # use newly passed value args.restore = True if beta_init != args.get('beta_init', None): args.beta_init = beta_init if beta_final != args.get('beta_final', None): args.beta_final = beta_final args.train_steps = train_steps else: # New training session timestamps = AttrDict({ 'month': io.get_timestamp('%Y_%m'), 'time': io.get_timestamp('%Y-%M-%d-%H%M%S'), 'hour': io.get_timestamp('%Y-%m-%d-%H'), 'minute': io.get_timestamp('%Y-%m-%d-%H%M'), 'second': io.get_timestamp('%Y-%m-%d-%H%M%S'), }) args.log_dir = io.make_log_dir(args, 'GaugeModel', log_file, timestamps=timestamps) io.write(f'{args.log_dir}', log_file, 'a') args.restore = False if hmc_steps > 0: x, _, eps = train_hmc(args) args.dynamics_config['eps'] = eps dynamics_config = args.get('dynamics_config', None) if dynamics_config is not None: log_dir = dynamics_config.get('log_dir', None) if log_dir is not None: eps_file = os.path.join(log_dir, 'training', 'models', 'eps.z') if os.path.isfile(eps_file): io.log(f'Loading eps from: {eps_file}') eps = io.loadz(eps_file) args.dynamics_config['eps'] = eps _, dynamics, _, args = train(args, x=x) # ==== # Run inference on trained model if args.get('run_steps', 5000) > 0: # ==== # Run with random start dynamics, _, _ = run(dynamics, args) # ==== # Run HMC args.hmc = True args.dynamics_config['eps'] = 0.15 hmc_dir = os.path.join(args.log_dir, 'inference_hmc') _ = run_hmc(args=args, hmc_dir=hmc_dir)