def create_augmented_layers(mol): """ The indices in the string refer to the atom indices in the molecule, according to the atom order obtained by sorting the atoms using the InChI canonicalization algorithm. First a deep copy is created of the original molecule and hydrogen atoms are removed from the molecule. Next, the molecule is converted into an InChI string, and the auxiliary information of the inchification procedure is retrieved. The N-layer is parsed and used to sort the atoms of the original order according to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed as to ensure the atoms with the lowest indices are used to compose the string. """ if mol.getRadicalCount() == 0 and not has_unexpected_lone_pairs(mol): return None, None elif mol.getFormula() == 'H': return inchiutil.U_LAYER_PREFIX + '1', None else: molcopy = mol.copy(deep=True) hydrogens = filter(lambda at: at.number == 1, molcopy.atoms) [molcopy.removeAtom(h) for h in hydrogens] rdkitmol = toRDKitMol(molcopy) _, auxinfo = Chem.MolToInchiAndAuxInfo( rdkitmol, options='-SNon') # suppress stereo warnings # extract the atom numbers from N-layer of auxiliary info: atom_indices = inchiutil.parse_N_layer(auxinfo) atom_indices = [ atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms) ] # sort the atoms based on the order of the atom indices molcopy.atoms = [ x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0]) ] ulayer = create_U_layer(molcopy, auxinfo) player = create_P_layer(molcopy, auxinfo) return ulayer, player
def create_augmented_layers(mol): """ The indices in the string refer to the atom indices in the molecule, according to the atom order obtained by sorting the atoms using the InChI canonicalization algorithm. First a deep copy is created of the original molecule and hydrogen atoms are removed from the molecule. Next, the molecule is converted into an InChI string, and the auxiliary information of the inchification procedure is retrieved. The N-layer is parsed and used to sort the atoms of the original order according to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed as to ensure the atoms with the lowest indices are used to compose the string. """ if mol.getRadicalCount() == 0 and not has_unexpected_lone_pairs(mol): return None, None elif mol.getFormula() == 'H': return inchiutil.U_LAYER_PREFIX + '1', None else: molcopy = mol.copy(deep=True) hydrogens = filter(lambda at: at.number == 1, molcopy.atoms) [molcopy.removeAtom(h) for h in hydrogens] rdkitmol = toRDKitMol(molcopy) _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon')# suppress stereo warnings # extract the atom numbers from N-layer of auxiliary info: atom_indices = inchiutil.parse_N_layer(auxinfo) atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] # sort the atoms based on the order of the atom indices molcopy.atoms = [x for (y,x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] ulayer = create_U_layer(molcopy, auxinfo) player = create_P_layer(molcopy, auxinfo) return ulayer, player