def main() -> None: config = get_config() bag_symbols = config['bag_symbols'].split(',') action_space = ActionSpace() observation_space = ObservationSpace(canvas_size=config['canvas_size'], symbols=bag_symbols) model = load_specific_model(model_path=config['loaded_model_name']) model.action_space = action_space model.observation_space = observation_space reward = InteractionReward(config['rho']) if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] eval_formulas = parse_formulas(config['eval_formulas']) eval_init_formulas = parse_formulas(config['eval_formulas']) eval_env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=eval_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=eval_init_formulas, bag_refills=config['bag_refills'], ) eval_buffer_size = 1000 eval_buffer = PPOBuffer(int_act_dim=model.internal_action_dim, size=eval_buffer_size, gamma=config['discount'], lam=config['lam']) with torch.no_grad(): model.training = False rollout_info = rollout(model, eval_env, eval_buffer, num_episodes=1) model.training = True logging.info('Evaluation rollout: ' + str(rollout_info)) atoms, _ = eval_env.observation_space.parse( eval_buffer.next_obs_buf[eval_buffer.ptr - 1]) print(atoms) io.write('/home/energy/s153999/evaluated_structure.traj', atoms)
class TestReward(TestCase): RESOURCES = pkg_resources.resource_filename(__package__, RESOURCES_FOLDER) def setUp(self): self.reward = InteractionReward() def test_calculation(self): reward, info = self.reward.calculate(Atoms(), Atom('H')) self.assertEqual(reward, 0) def test_h2(self): atom1 = Atom('H', position=(0, 0, 0)) atom2 = Atom('H', position=(1, 0, 0)) atoms = Atoms() atoms.append(atom1) reward, info = self.reward.calculate(atoms, atom2) self.assertAlmostEqual(reward, 0.1696435) def test_addition(self): atom1 = Atom('H', position=(0, 0, 0)) atom2 = Atom('H', position=(1, 0, 0)) atom3 = Atom('H', position=(2, 0, 0)) atoms = Atoms() atoms.append(atom1) reward1, _ = self.reward.calculate(atoms, atom2) atoms.append(atom2) reward2, _ = self.reward.calculate(atoms, atom3) atoms.append(atom3) self.assertAlmostEqual(reward1 + reward2, 0.2141968)
def setUp(self): self.reward = InteractionReward()
def setUp(self): self.reward = InteractionReward() self.symbols = ['H', 'C', 'N', 'O'] self.observation_space = ObservationSpace(canvas_size=5, symbols=self.symbols) self.action_space = ActionSpace()
def setUp(self): self.reward = InteractionReward() self.zs = [0, 1, 6, 7, 8] self.observation_space = ObservationSpace(canvas_size=5, zs=self.zs) self.action_space = ActionSpace(zs=self.zs)
def main() -> None: util.set_one_thread() # torch.set_num_threads(24) config = get_config() util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'], config['results_dir'], config['structures_dir']]) tag = util.get_tag(config) util.setup_logger(config, directory=config['log_dir'], tag=tag) util.save_config(config, directory=config['log_dir'], tag=tag) util.set_seeds(seed=config['seed'] + mpi.get_proc_rank()) model_handler = util.ModelIO(directory=config['model_dir'], tag=tag) bag_symbols = config['bag_symbols'].split(',') action_space = ActionSpace() observation_space = ObservationSpace(canvas_size=config['canvas_size'], symbols=bag_symbols) start_num_steps = 0 if config['loaded_model_name']: model = load_specific_model(model_path=config['loaded_model_name']) model.action_space = action_space model.observation_space = observation_space else: if not config['load_model']: model = build_model(config, observation_space=observation_space, action_space=action_space) else: model, start_num_steps = model_handler.load() model.action_space = action_space model.observation_space = observation_space mpi.sync_params(model) var_counts = util.count_vars(model) logging.info(f'Number of parameters: {var_counts}') reward = InteractionReward(config['rho']) # Evaluation formulas if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] train_formulas = parse_formulas(config['formulas']) eval_formulas = parse_formulas(config['eval_formulas']) train_init_formulas = parse_formulas(config['formulas']) eval_init_formulas = parse_formulas(config['eval_formulas']) logging.info(f'Training bags: {train_formulas}') logging.info(f'Evaluation bags: {eval_formulas}') # Number of episodes during evaluation if not config['num_eval_episodes']: config['num_eval_episodes'] = len(eval_formulas) env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=train_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=train_init_formulas, bag_refills=config['bag_refills'], ) eval_env = MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=eval_formulas, min_atomic_distance=config['min_atomic_distance'], max_h_distance=config['max_h_distance'], min_reward=config['min_reward'], initial_formula=eval_init_formulas, bag_refills=config['bag_refills'], ) rollout_saver = RolloutSaver(directory=config['data_dir'], tag=tag, all_ranks=config['all_ranks']) info_saver = InfoSaver(directory=config['results_dir'], tag=tag) image_saver = StructureSaver(directory=config['structures_dir'], tag=tag) ppo( env=env, eval_env=eval_env, ac=model, gamma=config['discount'], start_num_steps=start_num_steps, max_num_steps=config['max_num_steps'], num_steps_per_iter=config['num_steps_per_iter'], clip_ratio=config['clip_ratio'], learning_rate=config['learning_rate'], vf_coef=config['vf_coef'], entropy_coef=config['entropy_coef'], max_num_train_iters=config['max_num_train_iters'], lam=config['lam'], target_kl=config['target_kl'], gradient_clip=config['gradient_clip'], eval_freq=config['eval_freq'], model_handler=model_handler, save_freq=config['save_freq'], num_eval_episodes=config['num_eval_episodes'], rollout_saver=rollout_saver, save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all', save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all', info_saver=info_saver, structure_saver=image_saver, )
def main() -> None: config = get_config() util.create_directories([config['log_dir'], config['model_dir'], config['data_dir'], config['results_dir']]) tag = util.get_tag(config) util.setup_logger(config, directory=config['log_dir'], tag=tag) util.save_config(config, directory=config['log_dir'], tag=tag) util.set_seeds(seed=config['seed']) device = util.init_device(config['device']) zs = [ase.data.atomic_numbers[s] for s in config['symbols'].split(',')] action_space = ActionSpace(zs=zs) observation_space = ObservationSpace(canvas_size=config['canvas_size'], zs=zs) # Evaluation formulas if not config['eval_formulas']: config['eval_formulas'] = config['formulas'] train_formulas = util.split_formula_strings(config['formulas']) eval_formulas = util.split_formula_strings(config['eval_formulas']) logging.info(f'Training bags: {train_formulas}') logging.info(f'Evaluation bags: {eval_formulas}') model_handler = ModelIO(directory=config['model_dir'], tag=tag, keep=config['keep_models']) if config['load_latest']: model, start_num_steps = model_handler.load_latest(device=device) model.action_space = action_space model.observation_space = observation_space elif config['load_model'] is not None: model, start_num_steps = model_handler.load(device=device, path=config['load_model']) model.action_space = action_space model.observation_space = observation_space else: model = build_model(config, observation_space=observation_space, action_space=action_space, device=device) start_num_steps = 0 var_counts = util.count_vars(model) logging.info(f'Number of parameters: {var_counts}') reward = InteractionReward() # Number of episodes during evaluation if not config['num_eval_episodes']: config['num_eval_episodes'] = len(eval_formulas) training_envs = SimpleEnvContainer([ MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=[util.string_to_formula(f) for f in train_formulas], min_atomic_distance=config['min_atomic_distance'], max_solo_distance=config['max_solo_distance'], min_reward=config['min_reward'], ) for _ in range(config['num_envs']) ]) eval_envs = SimpleEnvContainer([ MolecularEnvironment( reward=reward, observation_space=observation_space, action_space=action_space, formulas=[util.string_to_formula(f) for f in eval_formulas], min_atomic_distance=config['min_atomic_distance'], max_solo_distance=config['max_solo_distance'], min_reward=config['min_reward'], ) ]) batch_ppo( envs=training_envs, eval_envs=eval_envs, ac=model, optimizer=util.get_optimizer(name=config['optimizer'], learning_rate=config['learning_rate'], parameters=model.parameters()), gamma=config['discount'], start_num_steps=start_num_steps, max_num_steps=config['max_num_steps'], num_steps_per_iter=config['num_steps_per_iter'], mini_batch_size=config['mini_batch_size'], clip_ratio=config['clip_ratio'], vf_coef=config['vf_coef'], entropy_coef=config['entropy_coef'], max_num_train_iters=config['max_num_train_iters'], lam=config['lam'], target_kl=config['target_kl'], gradient_clip=config['gradient_clip'], eval_freq=config['eval_freq'], model_handler=model_handler, save_freq=config['save_freq'], num_eval_episodes=config['num_eval_episodes'], rollout_saver=util.RolloutSaver(directory=config['data_dir'], tag=tag), save_train_rollout=config['save_rollouts'] == 'train' or config['save_rollouts'] == 'all', save_eval_rollout=config['save_rollouts'] == 'eval' or config['save_rollouts'] == 'all', info_saver=util.InfoSaver(directory=config['results_dir'], tag=tag), device=device, )