emb_graph = tf.tile(emb_graph, [1, tf.shape(emb_node)[1], 1]) emb_node = tf.concat([emb_node, emb_graph], axis=2) return emb_node #### debug if __name__ == "__main__": adj_np = np.ones((5, 3, 4, 4)) adj = tf.placeholder(shape=(5, 3, 4, 4), dtype=tf.float32) node_feature_np = np.ones((5, 1, 4, 3)) node_feature = tf.placeholder(shape=(5, 1, 4, 3), dtype=tf.float32) ob_space = {} atom_type = 5 ob_space['adj'] = gym.Space(shape=[3, 5, 5]) ob_space['node'] = gym.Space(shape=[1, 5, atom_type]) ac_space = gym.spaces.MultiDiscrete([10, 10, 3]) policy = GCNPolicy(name='policy', ob_space=ob_space, ac_space=ac_space) stochastic = True env = gym.make('molecule-v0') # in gym format env.init() ob = env.reset() # ob['adj'] = np.repeat(ob['adj'][None],2,axis=0) # ob['node'] = np.repeat(ob['node'][None],2,axis=0) print('adj', ob['adj'].shape) print('node', ob['node'].shape) with tf.Session() as sess:
def __init__(self, dt=0.01, nt=1000, seed=0, task='no_collision', map_file=None, simulator_conf=None, healthy_reward=1.0, **kwargs): # TODO: other possible tasks: precision_landing assert task in [ 'velocity_control', 'no_collision', 'hovering_control' ], 'Invalid task setting' if simulator_conf is None: simulator_conf = os.path.join(os.path.dirname(__file__), 'config.json') assert os.path.exists(simulator_conf), \ 'Simulator config file does not exist' self.dt = dt self.nt = nt self.ct = 0 self.task = task self.healthy_reward = healthy_reward self.simulator = QuadrotorSim() cfg_dict = self.simulator.get_config(simulator_conf) self.valid_range = cfg_dict['range'] self.action_space = gym.spaces.Box( low=np.array([cfg_dict['action_space_low']] * 4, dtype='float32'), high=np.array([cfg_dict['action_space_high']] * 4, dtype='float32'), shape=[4]) self.body_velocity_keys = ['b_v_x', 'b_v_y', 'b_v_z'] self.body_position_keys = ['b_x', 'b_y', 'b_z'] self.accelerator_keys = ['acc_x', 'acc_y', 'acc_z'] self.gyroscope_keys = ['gyro_x', 'gyro_y', 'gyro_z'] self.flight_pose_keys = ['pitch', 'roll', 'yaw'] self.barometer_keys = ['z'] self.task_velocity_control_keys = \ ['next_target_g_v_x', 'next_target_g_v_y', 'next_target_g_v_z'] obs_dim = len(self.body_velocity_keys) + \ len(self.body_position_keys) + \ len(self.accelerator_keys) + len(self.gyroscope_keys) + \ len(self.flight_pose_keys) + len(self.barometer_keys) if self.task == 'velocity_control': obs_dim += len(self.task_velocity_control_keys) self.observation_space = gym.Space(shape=[obs_dim], dtype='float32') self.state = {} self.viewer = None self.x_offset = self.y_offset = self.z_offset = 0 self.pos_0 = np.array([0.0] * 3).astype(np.float32) if self.task == 'velocity_control': self.velocity_targets = \ self.simulator.define_velocity_control_task( dt, nt, seed) elif self.task in ['no_collision', 'hovering_control']: self.map_matrix = Quadrotor.load_map(map_file) # Only for single quadrotor, also mark its start position y_offsets, x_offsets = np.where(self.map_matrix == -1) assert len(y_offsets) == 1 self.y_offset = y_offsets[0] self.x_offset = x_offsets[0] self.z_offset = 5. # TODO: setup a better init height self.map_matrix[self.y_offset, self.x_offset] = 0
def __init__(self, dataset_name, logp_ratio=1, qed_ratio=1, sa_ratio=1, reward_step_total=1, is_normalize=0, reward_type='gan', reward_target=0.5, has_scaffold=False, has_feature=False, is_conditional=False, conditional='low', max_action=128, min_action=20, force_final=False): self.dataset = all_datasets.get(dataset_name) if self.dataset is None: raise ValueError("dataset \"{}\" not found in [{}]".format( dataset_name, ", ".join(all_datasets.keys()))) self.is_normalize = bool(is_normalize) self.is_conditional = is_conditional self.has_feature = has_feature self.reward_type = reward_type self.reward_target = reward_target self.force_final = force_final self.conditional_list = load_conditional(conditional) if self.is_conditional: self.conditional = random.sample(self.conditional_list, 1)[0] self.mol = Chem.RWMol(Chem.MolFromSmiles(self.conditional[0])) Chem.SanitizeMol(self.mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE) else: self.mol = Chem.RWMol() self.smile_list = [] if dataset_name == 'gdb': possible_atoms = ['C', 'N', 'O', 'S', 'Cl'] # gdb 13 elif dataset_name == 'zinc': possible_atoms = ['C', 'N', 'O', 'S', 'P', 'F', 'I', 'Cl', 'Br'] # ZINC if self.has_feature: self.possible_formal_charge = np.array([-1, 0, 1]) self.possible_implicit_valence = np.array([-1, 0, 1, 2, 3, 4]) self.possible_ring_atom = np.array([True, False]) self.possible_degree = np.array([0, 1, 2, 3, 4, 5, 6, 7]) self.possible_hybridization = np.array([ Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, Chem.rdchem.HybridizationType.SP3D2 ], dtype=object) possible_bonds = [ Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE ] # , Chem.rdchem.BondType.AROMATIC self.atom_type_num = len(possible_atoms) self.possible_atom_types = np.array(possible_atoms) self.possible_bond_types = np.array(possible_bonds, dtype=object) if self.has_feature: # self.d_n = len(self.possible_atom_types) + len( # self.possible_formal_charge) + len( # self.possible_implicit_valence) + len(self.possible_ring_atom) + \ # len(self.possible_degree) + len(self.possible_hybridization) self.d_n = len( self.possible_atom_types) + 6 # 6 is the ring feature else: self.d_n = len(self.possible_atom_types) self.max_action = max_action self.min_action = min_action if dataset_name == 'gdb': self.max_atom = 13 + len(possible_atoms) # gdb 13 elif dataset_name == 'zinc': if self.is_conditional: self.max_atom = 38 + len( possible_atoms) + self.min_action # ZINC else: self.max_atom = 38 + len( possible_atoms) # ZINC + self.min_action self.logp_ratio = logp_ratio self.qed_ratio = qed_ratio self.sa_ratio = sa_ratio self.reward_step_total = reward_step_total self.action_space = gym.spaces.MultiDiscrete( [self.max_atom, self.max_atom, 3, 2]) self.observation_space = {} self.observation_space['adj'] = gym.Space( shape=[len(possible_bonds), self.max_atom, self.max_atom]) self.observation_space['node'] = gym.Space( shape=[1, self.max_atom, self.d_n]) self.counter = 0 # load scaffold data if necessary self.has_scaffold = has_scaffold if has_scaffold: self.scaffold = load_scaffold() self.max_scaffold = 6 self.level = 0 # for curriculum learning, level starts with 0, and increase afterwards
def init(self, reward_function, n_iterations, max_iterations, max_molecule_size=38, possible_atoms=None, terminate_on_done=False, expected_reward=0.5, target_reward=0.5, molecule_rollback=False, reward_func_sees_all=True): """ Constructor that exists outside of the __init__ method because gym doesn't allow addition of additional parameters when calling gym.make() function Parameters ---------- reward_function : function A function that returns a reward value, which is a float n_iterations: int The number of iterations before an interim reward is returned max_iterations: int User specified, the environment stop after this many iterations max_molecule_size: int The maximum permitted number of atoms in this molecule possible_atoms: list[str] List of elements of the periodic table to be used in the environment, which are strings terminate_on_done: boolean Boolean signifying whether to terminate on the done flag of step method becoming True expected_reward: float Argument that will be passed into the RL method the user specifies target_reward: float Target reward for the RL method the user specifies molecule_rollback: boolean If this is set to true, then changes made to the molecule will be rolled back upon getting negative rewards reward_func_sees_all: boolean For reducing the visibility of the class to the reward function, useful for optimization purposes since it drastically reduces the amount of information passed to the reward function. Being false, it only provides the RWMol object to the reward function object """ if possible_atoms is None: possible_atoms = ['C', 'N', 'O', 'S', 'Cl'] self.possible_atoms = possible_atoms self.possible_bonds = [ Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE ] self.max_molecule_size = max_molecule_size self.n_iterations = n_iterations self.max_iterations = max_iterations self.interim_reward = 0 self.cumulative_reward = 0 self.mol = Chem.RWMol() # dim d_n. Array that contains the possible atom symbols strs self.possible_atom_types = np.array(self.possible_atoms) # dim d_e. Array that contains the possible rdkit.Chem.rdchem.BondType objects self.possible_bond_types = np.array(self.possible_bonds, dtype=object) self.current_atom_index = None self.reward_function = reward_function # step_counter for number of iterations that occured self.step_counter = 0 self.action_space = gym.spaces.MultiDiscrete([ len(self.possible_atom_types), self.max_molecule_size, self.max_molecule_size, len(self.possible_bonds) ]) # param adj: adjacency matrix, numpy array, dim k x k. # param edge: edge attribute matrix, numpy array, dim k x k x d_e. # param node: node attribute matrix, numpy array, dim k x d_n. # k: maximum atoms in molecule # de: possible bond types # dn: possible atom types self.observation_space = { 'adj': gym.Space( shape=[1, self.max_molecule_size, self.max_molecule_size]), 'edge': gym.Space(shape=[ len(self.possible_bonds), self.max_molecule_size, self.max_molecule_size ]), 'node': gym.Space(shape=[ 1, self.max_molecule_size, len(self.possible_atom_types) ]) } self.pymol_window_flag = False self.terminate_on_done = terminate_on_done self.molecule_rollback = molecule_rollback self.reward_func_sees_all = reward_func_sees_all self.expected_reward = expected_reward self.target_reward = target_reward try: os.makedirs("./pymol_renderings") except OSError as e: if e.errno != errno.EEXIST: raise
def init(self, data_type='zinc', logp_ratio=1, qed_ratio=1, sa_ratio=1, reward_step_total=1, is_normalize=0, reward_type='gan', reward_target=0.5, has_scaffold=False, has_feature=False, is_conditional=False, conditional='low', max_action=128, min_action=20, force_final=False): ''' own init function, since gym does not support passing argument ''' self.is_normalize = bool(is_normalize) self.is_conditional = is_conditional self.has_feature = has_feature self.reward_type = reward_type self.reward_target = reward_target self.force_final = force_final self.conditional_list = load_conditional(conditional) if self.is_conditional: self.conditional = random.sample(self.conditional_list, 1)[0] self.mol = Chem.RWMol(Chem.MolFromSmiles(self.conditional[0])) Chem.SanitizeMol(self.mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE) else: self.mol = Chem.RWMol() self.smile_list = [] if data_type == 'zinc': possible_atoms = ['C', 'N', 'O', 'S', 'P', 'F', 'I', 'Cl', 'Br'] # ZINC if self.has_feature: self.possible_formal_charge = np.array([-1, 0, 1]) self.possible_implicit_valence = np.array([-1, 0, 1, 2, 3, 4]) self.possible_ring_atom = np.array([True, False]) self.possible_degree = np.array([0, 1, 2, 3, 4, 5, 6, 7]) self.possible_hybridization = np.array([ Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, Chem.rdchem.HybridizationType.SP3D2 ], dtype=object) possible_bonds = [ Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE ] #, Chem.rdchem.BondType.AROMATIC self.atom_type_num = len(possible_atoms) self.possible_atom_types = np.array(possible_atoms) self.possible_bond_types = np.array(possible_bonds, dtype=object) if self.has_feature: # self.d_n = len(self.possible_atom_types) + len( # self.possible_formal_charge) + len( # self.possible_implicit_valence) + len(self.possible_ring_atom) + \ # len(self.possible_degree) + len(self.possible_hybridization) self.d_n = len( self.possible_atom_types) + 6 # 6 is the ring feature else: self.d_n = len(self.possible_atom_types) self.max_action = max_action self.min_action = min_action if data_type == 'zinc': if self.is_conditional: self.max_atom = 38 + len( possible_atoms) + self.min_action # ZINC else: self.max_atom = 38 + len( possible_atoms) # ZINC + self.min_action self.logp_ratio = logp_ratio self.qed_ratio = qed_ratio self.sa_ratio = sa_ratio self.reward_step_total = reward_step_total self.action_space = gym.spaces.MultiDiscrete( [self.max_atom, self.max_atom, 3, 2]) self.observation_space = {} self.observation_space['adj'] = gym.Space( shape=[len(possible_bonds), self.max_atom, self.max_atom]) self.observation_space['node'] = gym.Space( shape=[1, self.max_atom, self.d_n]) self.counter = 0 ## load expert data cwd = os.path.dirname(__file__) if data_type == 'zinc': path = os.path.join( os.path.dirname(cwd), 'dataset', '250k_rndm_zinc_drugs_clean_sorted.smi') # ZINC self.dataset = gdb_dataset(path) ## load scaffold data if necessary self.has_scaffold = has_scaffold if has_scaffold: self.scaffold = load_scaffold() self.max_scaffold = 6 self.level = 0 # for curriculum learning, level starts with 0, and increase afterwards