def get_max_n_atoms(smi_file): """ Determines the maximum number of atoms per molecule in an input SMILES file. Args: smi_file (str) : Full path/filename to SMILES file. """ molecules = load_molecules(path=smi_file) max_n_atoms = 0 for mol in molecules: n_atoms = mol.GetNumAtoms() if n_atoms > max_n_atoms: max_n_atoms = n_atoms return max_n_atoms
def get_formal_charges(smi_file): """ Determines the formal charges present in an input SMILES file. Args: smi_file (str) : Full path/filename to SMILES file. """ molecules = load_molecules(path=smi_file) # create a list of all the formal charges formal_charges = list() for mol in molecules: for atom in mol.GetAtoms(): formal_charges.append(atom.GetFormalCharge()) # remove duplicate formal charges then sort set_of_formal_charges = set(formal_charges) formal_charges_sorted = list(set_of_formal_charges) formal_charges_sorted.sort() return formal_charges_sorted
def get_atom_types(smi_file): """ Determines the atom types present in an input SMILES file. Args: smi_file (str) : Full path/filename to SMILES file. """ molecules = load_molecules(path=smi_file) # create a list of all the atom types atom_types = list() for mol in molecules: for atom in mol.GetAtoms(): atom_types.append(atom.GetAtomicNum()) # remove duplicate atom types then sort by atomic number set_of_atom_types = set(atom_types) atom_types_sorted = list(set_of_atom_types) atom_types_sorted.sort() # return the symbols, for convenience return [rdkit.Chem.Atom(atom).GetSymbol() for atom in atom_types_sorted]