def parses(s):
    s = convert_nx_to_smiles(convert_smiles_to_nx(s))
    try:
        convert_nx_to_dict(convert_smiles_to_nx(s), atom_types, bond_types)
        return True
    except ValueError:
        return False
예제 #2
0
    def update_actions(self, new_state: nx.Graph, allowed_space: Space):
        """Generate the available actions for a new state

        Uses the actions to redefine the action space for

        Args:
            new_state (str): Molecule used to define action space
            allowed_space (Space): Space of possible observations
        """

        # Store the new state
        self._state = new_state

        # Compute the possible actions, which we describe by the new molecule they would form
        valid_actions = get_valid_actions(
            convert_nx_to_smiles(new_state),
            atom_types=self.atom_types,
            allow_removal=self.allow_removal,
            allow_no_modification=self.allow_no_modification,
            allowed_ring_sizes=self.allowed_ring_sizes,
            allow_bonds_between_rings=self.allow_bonds_between_rings,
            max_molecule_size=self.max_molecule_size)

        # Get only those actions which are in the desired space
        self._valid_actions = [
            convert_smiles_to_nx(x) for x in valid_actions
            if x in allowed_space
        ]
예제 #3
0
def run_experiment(episodes, n_steps, update_q_every, log_file, rewards: Dict[str, RewardFunction]):
    """Perform the RL experiment

    Args:
        episodes (int): Number of episodes to run
        n_steps (int): Maximum number of steps per episode
        update_q_every (int): After how many updates to update the Q function
        log_file (DictWriter): Tool to write the output function
    """
    best_reward = -1 * inf

    for e in tqdm(range(episodes), desc='RL Episodes', leave=True, disable=False):
        current_state = env.reset()
        for s in tqdm(range(n_steps), desc='\t RL Steps', disable=True):
            # Get action based on current state
            action, q, was_random = agent.action()

            # Fix cluster action
            new_state, reward, done, _ = env.step(action)

            # Check if it's the last step and flag as done
            if s == n_steps:
                logger.debug('Last step  ... done')
                done = True

            # Save outcome
            agent.remember(current_state, action, reward,
                           new_state, agent.env.action_space.get_possible_actions(), done)

            # Train model
            loss = agent.train()

            # Compute all of the rewards
            state_rewards = dict((name, r(new_state)) for name, r in rewards.items())

            # Write to output log
            log_file.writerow({
                'episode': e, 'step': s, 'smiles': convert_nx_to_smiles(env.state),
                'loss': loss, 'reward': reward, 'epsilon': agent.epsilon, 'q': q,
                'random': was_random,
                **state_rewards
            })

            # Update state
            current_state = new_state

            if reward > best_reward:
                best_reward = reward
                logger.info("Best reward: %s" % best_reward)

            if done:
                break

        # Update the Q network after certain numbers of episodes and adjust epsilon
        if e > 0 and e % update_q_every == 0:
            agent.update_target_q_network()
        agent.epsilon_adj()
예제 #4
0
def convert_nx_to_dict(graph: nx.Graph, atom_types: List[int],
                       bond_types: List[str]) -> dict:
    """Convert networkx representation of a molecule to an MPNN-ready dict

    Args:
        graph: Molecule to be converted
        atom_types: Lookup table of observed atom types
        bond_types: Lookup table of observed bond types
    Returns:
        (dict) Molecule as a dict
    """

    # Get the atom types
    atom_type = [n['atomic_num'] for _, n in graph.nodes(data=True)]
    atom_type_id = list(map(atom_types.index, atom_type))

    # Get the bond types, making the data
    connectivity = []
    edge_type = []
    for a, b, d in graph.edges(data=True):
        connectivity.append([a, b])
        connectivity.append([b, a])
        edge_type.append(str(d['bond_type']))
        edge_type.append(str(d['bond_type']))
    edge_type_id = list(map(bond_types.index, edge_type))

    # Sort connectivity array by the first column
    #  This is needed for the MPNN code to efficiently group messages for
    #  each node when performing the message passing step
    connectivity = np.array(connectivity)
    if connectivity.size > 0:
        # Skip a special case of a molecule w/o bonds
        inds = np.lexsort((connectivity[:, 1], connectivity[:, 0]))
        connectivity = connectivity[inds, :]

        # Tensorflow's "segment_sum" will cause problems if the last atom
        #  is not bonded because it returns an array
        if connectivity.max() != len(atom_type) - 1:
            smiles = convert_nx_to_smiles(graph)
            raise ValueError(f"Problem with unconnected atoms for {smiles}")
    else:
        connectivity = np.zeros((0, 2))

    return {
        'n_atom': len(atom_type),
        'n_bond': len(edge_type),
        'atom': atom_type_id,
        'bond': edge_type_id,
        'connectivity': connectivity
    }
예제 #5
0
def generate_molecules(
        agent: DQNFinalState,
        episodes: int = 10,
        n_steps: int = 32,
        update_q_every: int = 10) -> Tuple[Set[str], DQNFinalState]:
    """Perform the RL experiment

    Args:
        agent (DQNFinalState): Molecular design agent
        episodes (int): Number of episodes to run
        n_steps (int): Maximum number of steps per episode
        update_q_every (int): After how many updates to update the Q function
    Returns:
        ([str]) List of molecules that were created
    """

    # Prepare the output
    output = set()

    # Keep track of the smiles strings
    for e in range(episodes):
        current_state = agent.env.reset()
        logger.info(f'Starting episode {e+1}/{episodes}')
        for s in range(n_steps):
            # Get action based on current state
            action, _, _ = agent.action()

            # Fix cluster action
            new_state, reward, done, _ = agent.env.step(action)

            # Check if it's the last step and flag as done
            if s == n_steps:
                logger.debug('Last step  ... done')
                done = True

            # Add the state to the output
            output.add(agent.env.state)

            # Save outcome
            agent.remember(current_state, action, reward, new_state,
                           agent.env.action_space.get_possible_actions(), done)

            # Train model
            agent.train()

            # Update state
            current_state = new_state

            if done:
                break

        # Update the Q network after certain numbers of episodes and adjust epsilon
        if e > 0 and e % update_q_every == 0:
            agent.update_target_q_network()
        agent.epsilon_adj()

    # Clear out the memory: Too large to send back to client
    agent.memory.clear()

    # Convert the outputs back to SMILES strings
    output = set(convert_nx_to_smiles(x) for x in output)
    return output, agent
예제 #6
0
 def _call(self, graph: nx.Graph) -> float:
     if graph is None:
         return 0
     return self.model.predict([convert_nx_to_smiles(graph)])[0]