def test_from_SMILES_like_string1(self): # generate fragment from SMILES like string # the atom type is also calculated smiles_like = 'C' fragment = afm.fragment.Fragment().from_SMILES_like_string(smiles_like) # construct fragment manually atom_C = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) atom_H1 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H2 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H3 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H4 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_C.atomType = atomTypes['Cs'] atom_H1.atomType = atomTypes['H'] atom_H2.atomType = atomTypes['H'] atom_H3.atomType = atomTypes['H'] atom_H4.atomType = atomTypes['H'] vertices = [atom_C, atom_H1, atom_H2, atom_H3, atom_H4] bonds = [ Bond(atom_C, atom_H1, 1), Bond(atom_C, atom_H2, 1), Bond(atom_C, atom_H3, 1), Bond(atom_C, atom_H4, 1) ] expected_fragment = afm.fragment.Fragment() for vertex in vertices: expected_fragment.addVertex(vertex) for bond in bonds: expected_fragment.addEdge(bond) self.assertTrue(expected_fragment.isIsomorphic(fragment))
def setUp(self): """ A function run before each unit test in this class. """ # construct the first fragment atom_C1 = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) cutting_label_R1 = afm.fragment.CuttingLabel('R') cutting_label_L1 = afm.fragment.CuttingLabel('L') vertices = [atom_C1, cutting_label_R1, cutting_label_L1] bonds = [ Bond(atom_C1, cutting_label_R1), Bond(atom_C1, cutting_label_L1) ] self.fragment1 = afm.fragment.Fragment() for vertex in vertices: self.fragment1.addVertex(vertex) for bond in bonds: self.fragment1.addEdge(bond) # construct the second fragment atom_C2 = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) cutting_label_R2 = afm.fragment.CuttingLabel('R') cutting_label_L2 = afm.fragment.CuttingLabel('L') vertices = [atom_C2, cutting_label_R2, cutting_label_L2] bonds = [ Bond(atom_C2, cutting_label_R2), Bond(atom_C2, cutting_label_L2) ] self.fragment2 = afm.fragment.Fragment() for vertex in vertices: self.fragment2.addVertex(vertex) for bond in bonds: self.fragment2.addEdge(bond)
def saturate(atoms): """ Returns a list of atoms that is extended (and bond attributes) by saturating the valency of the non-hydrogen atoms with an appropriate number of hydrogen atoms. The required number of hydrogen atoms per heavy atom is determined as follows: H's = max number of valence electrons - atom.radical_electrons - 2* atom.lone_pairs - order - atom.charge """ new_atoms = [] for atom in atoms: try: max_number_of_valence_electrons = PeriodicSystem.valence_electrons[atom.symbol] except KeyError: raise InvalidAdjacencyListError( 'Cannot add hydrogens to adjacency list: Unknown orbital for atom "{0}".'.format(atom.symbol)) order = atom.get_total_bond_order() number_of_h_to_be_added = max_number_of_valence_electrons - atom.radical_electrons - 2 * atom.lone_pairs - int( order) - atom.charge if number_of_h_to_be_added < 0: raise InvalidAdjacencyListError('Incorrect electron configuration on atom.') for _ in range(number_of_h_to_be_added): a = Atom(element='H', radical_electrons=0, charge=0, label='', lone_pairs=0) b = Bond(atom, a, 'S') new_atoms.append(a) atom.bonds[a] = b a.bonds[atom] = b atoms.extend(new_atoms)
def saturate_radicals(self): """ Saturate the fragment by replacing all radicals with bonds to hydrogen atoms. Changes self molecule object. """ added = {} for atom in self.vertices: for i in range(atom.radicalElectrons): H = Atom('H', radicalElectrons=0, lonePairs=0, charge=0) bond = Bond(atom, H, 1) self.addAtom(H) self.addBond(bond) if atom not in added: added[atom] = [] added[atom].append([H, bond]) atom.decrementRadical() # Update the atom types of the saturated structure (not sure why # this is necessary, because saturating with H shouldn't be # changing atom types, but it doesn't hurt anything and is not # very expensive, so will do it anyway) self.sortVertices() self.updateAtomTypes() self.multiplicity = 1 return added
def update_molecule(mol, to_single_bonds=False): """ Returns a copy of the current molecule with updated atomTypes if to_single_bonds is True, the returned mol contains only single bonds. This is useful for isomorphism comparison """ new_mol = Molecule() try: atoms = mol.atoms except AttributeError: return None atom_mapping = dict() for atom1 in atoms: new_atom = new_mol.addAtom(Atom(atom1.element)) atom_mapping[atom1] = new_atom for atom1 in atoms: for atom2 in atom1.bonds.keys(): bond_order = 1.0 if to_single_bonds else atom1.bonds[ atom2].getOrderNum() bond = Bond(atom_mapping[atom1], atom_mapping[atom2], bond_order) new_mol.addBond(bond) try: new_mol.updateAtomTypes() except AtomTypeError: pass new_mol.multiplicity = mol.multiplicity return new_mol
def testToAdjacencyListForNonIntegerBonds(self): """ Test the adjacency list can be created for molecules with bond orders that don't fit into single, double, triple, or benzene """ from rmgpy.molecule.molecule import Atom, Bond, Molecule atom1 = Atom(element='H', lonePairs=0) atom2 = Atom(element='H', lonePairs=0) bond = Bond(atom1, atom2, 0.5) mol = Molecule(multiplicity=1) mol.addAtom(atom1) mol.addAtom(atom2) mol.addBond(bond) adjlist = mol.toAdjacencyList() self.assertIn('H', adjlist) self.assertIn('{1,0.5}', adjlist)
def test_update(self): atom_C = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) atom_H1 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H2 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) cutting_label_R1 = afm.fragment.CuttingLabel('R') cutting_label_R2 = afm.fragment.CuttingLabel('R') vertices = [ atom_C, cutting_label_R1, cutting_label_R2, atom_H1, atom_H2 ] bonds = [ Bond(atom_C, cutting_label_R1, 1), Bond(atom_C, cutting_label_R2, 1), Bond(atom_C, atom_H1, 1), Bond(atom_C, atom_H2, 1) ] fragment = afm.fragment.Fragment() for vertex in vertices: fragment.addVertex(vertex) for bond in bonds: fragment.addEdge(bond) fragment.update() for v in fragment.vertices: if isinstance(v, Atom) and v.isCarbon(): break self.assertTrue(v.atomType == atomTypes['Cs']) self.assertTrue(fragment.getNetCharge() == 0) self.assertTrue(fragment.multiplicity == 1)
def setUp(self): """ A method called before each unit test in this class. """ self.atom = Atom(element=getElement('C'), radicalElectrons=1, charge=0, label='*1', lonePairs=0)
def testIsSpecificCaseOf(self): """ Test the Atom.isSpecificCaseOf() method. """ for index1, element1 in enumerate(elementList[0:10]): for index2, element2 in enumerate(elementList[0:10]): atom1 = Atom(element=element1, radicalElectrons=1, charge=0, label='*1', lonePairs=0) atom2 = Atom(element=element2, radicalElectrons=1, charge=0, label='*1', lonePairs=0) if index1 == index2: self.assertTrue(atom1.isSpecificCaseOf(atom2)) else: self.assertFalse(atom1.isSpecificCaseOf(atom2))
def testSubgraphIsomorphism(self): # Simple test comparing C-C to C-C-C (no hydrogens) mol = Molecule() c1 = Atom(getElement(6)) c2 = c1.copy() mol.addAtom(c1) mol.addAtom(c2) mol.addBond(Bond(c1, c2)) mol2 = Molecule() c1 = c1.copy() c2 = c1.copy() c3 = c1.copy() mol2.addAtom(c1) mol2.addAtom(c2) mol2.addAtom(c3) mol2.addBond(Bond(c1, c2)) mol2.addBond(Bond(c2, c3)) self.assertTrue(self.vf3.isSubgraphIsomorphic(mol2, mol, None)) self.assertFalse(self.vf3.isSubgraphIsomorphic(mol, mol2, None)) # Ring membership is a semantic property of molecules, # so straight chains are not considered sub graphs of rings hexane = Molecule().fromSMILES("C1CCCCC1") self.assertFalse(self.vf3.isSubgraphIsomorphic(hexane, mol, None)) self.assertFalse(self.vf3.isSubgraphIsomorphic(hexane, mol2, None)) # Benzene and hexane, while technically sharing the same shape, # differ in semantic information. benzene = Molecule().fromSMILES("C1=CC=CC=C1") self.assertFalse(self.vf3.isSubgraphIsomorphic(hexane, benzene, None)) # Test sub graph isomorphism on rings hexaneMinusH = hexane.copy(True) hexaneMinusH.removeVertex(hexaneMinusH.vertices[6]) self.assertTrue(self.vf3.isSubgraphIsomorphic(hexane, hexaneMinusH, None)) self.assertFalse(self.vf3.isSubgraphIsomorphic(hexaneMinusH, hexane, None)) benzeneMinusH = benzene.copy(True) benzeneMinusH.removeVertex(benzeneMinusH.vertices[6]) self.assertTrue(self.vf3.isSubgraphIsomorphic(benzene, benzeneMinusH, None)) self.assertFalse(self.vf3.isSubgraphIsomorphic(benzeneMinusH, hexane, None))
def testIsOxygen(self): """ Test the Atom.isOxygen() method. """ for element in elementList: atom = Atom(element=element, radicalElectrons=1, charge=0, label='*1', lonePairs=2) if element.symbol == 'O': self.assertTrue(atom.isOxygen()) else: self.assertFalse(atom.isOxygen())
def s_bonds_mol_from_xyz(xyz): """ Create a single bonded molecule from xyz using RMG's connectTheDots() """ mol = Molecule() coordinates = list() if not isinstance(xyz, (str, unicode)): raise SpeciesError('xyz must be a string format, got: {0}'.format(type(xyz))) for line in xyz.split('\n'): if line: atom = Atom(element=str(line.split()[0])) coordinates.append([float(line.split()[1]), float(line.split()[2]), float(line.split()[3])]) atom.coords = np.array(coordinates[-1], np.float64) mol.addAtom(atom) mol.connectTheDots() # only adds single bonds, but we don't care return mol, coordinates
def testApplyActionLoseRadical(self): """ Test the Atom.applyAction() method for a LOSE_RADICAL action. """ action = ['LOSE_RADICAL', '*1', 1] for element in elementList: atom0 = Atom(element=element, radicalElectrons=1, charge=0, label='*1', lonePairs=0) atom = atom0.copy() atom.applyAction(action) self.assertEqual(atom0.element, atom.element) self.assertEqual(atom0.radicalElectrons, atom.radicalElectrons + 1) self.assertEqual(atom0.charge, atom.charge) self.assertEqual(atom0.label, atom.label)
def testApplyActionDecrementBond(self): """ Test the Atom.applyAction() method for a CHANGE_BOND action. """ action = ['CHANGE_BOND', '*1', -1, '*2'] for element in elementList: atom0 = Atom(element=element, radicalElectrons=1, charge=0, label='*1', lonePairs=0) atom = atom0.copy() atom.applyAction(action) self.assertEqual(atom0.element, atom.element) self.assertEqual(atom0.radicalElectrons, atom.radicalElectrons) self.assertEqual(atom0.charge, atom.charge) self.assertEqual(atom0.label, atom.label)
def get_representative_molecule(self, mode='minimal', update=True): if mode == 'minimal': # create a molecule from fragment.vertices.copy mapping = self.copyAndMap() # replace CuttingLabel with H atoms = [] for vertex in self.vertices: mapped_vertex = mapping[vertex] if isinstance(mapped_vertex, CuttingLabel): # replace cutting label with atom H atom_H = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) for bondedAtom, bond in mapped_vertex.edges.iteritems(): new_bond = Bond(bondedAtom, atom_H, order=bond.order) bondedAtom.edges[atom_H] = new_bond del bondedAtom.edges[mapped_vertex] atom_H.edges[bondedAtom] = new_bond mapping[vertex] = atom_H atoms.append(atom_H) else: atoms.append(mapped_vertex) # Note: mapping is a dict with # key: self.vertex and value: mol_repr.atom mol_repr = Molecule() mol_repr.atoms = atoms if update: mol_repr.update() return mol_repr, mapping
def toSMILES(self): cutting_label_list = [] for vertex in self.vertices: if isinstance(vertex, CuttingLabel): cutting_label_list.append(vertex.symbol) SMILES_before = self.copy(deep=True) final_vertices = [] for ind, atom in enumerate(SMILES_before.atoms): element_symbol = atom.symbol if isinstance(atom, CuttingLabel): substi_name = 'Si' substi = Atom(element=substi_name) substi.label = element_symbol for bondedAtom, bond in atom.edges.iteritems(): new_bond = Bond(bondedAtom, substi, order=bond.order) bondedAtom.edges[substi] = new_bond del bondedAtom.edges[atom] substi.edges[bondedAtom] = new_bond substi.radicalElectrons = 3 final_vertices.append(substi) else: final_vertices.append(atom) SMILES_before.vertices = final_vertices mol_repr = Molecule() mol_repr.atoms = SMILES_before.vertices SMILES_after = mol_repr.toSMILES() import re smiles = re.sub('\[Si\]', '', SMILES_after) return smiles
def fromRDKitMol(self, rdkitmol, atom_replace_dict=None): """ Convert a RDKit Mol object `rdkitmol` to a molecular structure. Uses `RDKit <http://rdkit.org/>`_ to perform the conversion. This Kekulizes everything, removing all aromatic atom types. """ from rdkit import Chem self.vertices = [] # Add hydrogen atoms to complete molecule if needed rdkitmol.UpdatePropertyCache(strict=False) rdkitmol = Chem.AddHs(rdkitmol) Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True) # iterate through atoms in rdkitmol for i in xrange(rdkitmol.GetNumAtoms()): rdkitatom = rdkitmol.GetAtomWithIdx(i) # Use atomic number as key for element number = rdkitatom.GetAtomicNum() element = getElement(number) # Process charge charge = rdkitatom.GetFormalCharge() radicalElectrons = rdkitatom.GetNumRadicalElectrons() ELE = element.symbol if atom_replace_dict.has_key('[' + ELE + ']'): cutting_label_name = atom_replace_dict['[' + ELE + ']'] cutting_label = CuttingLabel(name=cutting_label_name) self.vertices.append(cutting_label) else: atom = Atom(element, radicalElectrons, charge, '', 0) self.vertices.append(atom) # Add bonds by iterating again through atoms for j in xrange(0, i): rdkitbond = rdkitmol.GetBondBetweenAtoms(i, j) if rdkitbond is not None: order = 0 # Process bond type rdbondtype = rdkitbond.GetBondType() if rdbondtype.name == 'SINGLE': order = 1 elif rdbondtype.name == 'DOUBLE': order = 2 elif rdbondtype.name == 'TRIPLE': order = 3 elif rdbondtype.name == 'AROMATIC': order = 1.5 bond = Bond(self.vertices[i], self.vertices[j], order) self.addBond(bond) # We need to update lone pairs first because the charge was set by RDKit self.updateLonePairs() # Set atom types and connectivity values self.update() # Assume this is always true # There are cases where 2 radicalElectrons is a singlet, but # the triplet is often more stable, self.updateMultiplicity() # mol.updateAtomTypes() return self
def get_resonance_hybrid(self): """ Returns a molecule object with bond orders that are the average of all the resonance structures. """ # get labeled resonance isomers self.generate_resonance_structures(keep_isomorphic=True) # only consider reactive molecules as representative structures molecules = [mol for mol in self.molecule if mol.reactive] # return if no resonance if len(molecules) == 1: return molecules[0] # create a sorted list of atom objects for each resonance structure cython.declare( atomsFromStructures=list, oldAtoms=list, newAtoms=list, numResonanceStructures=cython.short, structureNum=cython.short, oldBondOrder=cython.float, index1=cython.short, index2=cython.short, newMol=Molecule, oldMol=Molecule, atom1=Atom, atom2=Atom, bond=Bond, atoms=list, ) atoms_from_structures = [] for new_mol in molecules: new_mol.atoms.sort(key=lambda atom: atom.id) atoms_from_structures.append(new_mol.atoms) num_resonance_structures = len(molecules) # make original structure with no bonds new_mol = Molecule() original_atoms = atoms_from_structures[0] for atom1 in original_atoms: atom = new_mol.add_atom(Atom(atom1.element)) atom.id = atom1.id new_atoms = new_mol.atoms # initialize bonds to zero order for index1, atom1 in enumerate(original_atoms): for atom2 in atom1.bonds: index2 = original_atoms.index(atom2) bond = Bond(new_atoms[index1], new_atoms[index2], 0) new_mol.add_bond(bond) # set bonds to the proper value for structureNum, oldMol in enumerate(molecules): old_atoms = atoms_from_structures[structureNum] for index1, atom1 in enumerate(old_atoms): # make bond orders average of resonance structures for atom2 in atom1.bonds: index2 = old_atoms.index(atom2) new_bond = new_mol.get_bond(new_atoms[index1], new_atoms[index2]) old_bond_order = oldMol.get_bond( old_atoms[index1], old_atoms[index2]).get_order_num() new_bond.apply_action( ('CHANGE_BOND', None, old_bond_order / num_resonance_structures / 2)) # set radicals in resonance hybrid to maximum of all structures if atom1.radical_electrons > 0: new_atoms[index1].radical_electrons = max( atom1.radical_electrons, new_atoms[index1].radical_electrons) new_mol.update_atomtypes(log_species=False, raise_exception=False) return new_mol
def from_adjacency_list(adjlist, group=False, saturate_h=False): """ Convert a string adjacency list `adjlist` into a set of :class:`Atom` and :class:`Bond` objects. """ atoms = [] atom_dict = {} bonds = {} multiplicity = None adjlist = adjlist.strip() lines = adjlist.splitlines() if adjlist == '' or len(lines) == 0: raise InvalidAdjacencyListError('Empty adjacency list.') # Detect old-style adjacency lists by looking at the last line's syntax last_line = lines[-1].strip() while not last_line: # Remove any empty lines from the end lines.pop() last_line = lines[-1].strip() if re_intermediate_adjlist.match(last_line): logging.debug( "adjacency list:\n{1}\nline '{0}' looks like an intermediate style " "adjacency list".format(last_line, adjlist)) return from_old_adjacency_list(adjlist, group=group, saturate_h=saturate_h) if re_old_adjlist.match(last_line): logging.debug( "Adjacency list:\n{1}\nline '{0}' looks like an old style adjacency list" .format(last_line, adjlist)) if not group: logging.debug("Will assume implicit H atoms") return from_old_adjacency_list(adjlist, group=group, saturate_h=(not group)) # Interpret the first line if it contains a label if len(lines[0].split()) == 1: label = lines.pop(0) if len(lines) == 0: raise InvalidAdjacencyListError( 'No atoms specified in adjacency list.') # Interpret the second line if it contains a multiplicity if lines[0].split()[0] == 'multiplicity': line = lines.pop(0) if group: match = re.match( r'\s*multiplicity\s+\[\s*(\d(?:,\s*\d)*)\s*\]\s*$', line) if not match: rematch = re.match(r'\s*multiplicity\s+x\s*$', line) if not rematch: raise InvalidAdjacencyListError( "Invalid multiplicity line '{0}'. Should be a list like " "'multiplicity [1,2,3]' or a wildcard 'multiplicity x'" .format(line)) else: # should match "multiplicity [1]" or " multiplicity [ 1, 2, 3 ]" or " multiplicity [1,2,3]" # and whatever's inside the [] (excluding leading and trailing spaces) should be captured as group 1. # If a wildcard is desired, this line can be omitted or replaced with 'multiplicity x' # Multiplicities must be only one digit (i.e. less than 10) # The (?:,\s*\d)* matches patters like ", 2" 0 or more times, but doesn't capture them (because of the leading ?:) multiplicities = match.group(1).split(',') multiplicity = [int(i) for i in multiplicities] else: match = re.match(r'\s*multiplicity\s+\d+\s*$', line) if not match: raise InvalidAdjacencyListError( "Invalid multiplicity line '{0}'. Should be an integer like " "'multiplicity 2'".format(line)) multiplicity = int(line.split()[1]) if len(lines) == 0: raise InvalidAdjacencyListError( 'No atoms specified in adjacency list: \n{0}'.format(adjlist)) mistake1 = re.compile(r'\{[^}]*\s+[^}]*\}') # Iterate over the remaining lines, generating Atom or GroupAtom objects for line in lines: # Sometimes people put spaces after commas, which messes up the # parse-by-whitespace. Examples include '[Cd, Ct]'. if mistake1.search(line): raise InvalidAdjacencyListError( "{1} Shouldn't have spaces inside braces:\n{0}".format( mistake1.search(line).group(), adjlist)) # Sometimes commas are used to delimit bonds in the bond list, # so replace them just in case line = line.replace('},{', '} {') data = line.split() # Skip if blank line if len(data) == 0: continue # First item is index for atom # Sometimes these have a trailing period (as if in a numbered list), # so remove it just in case aid = int(data[0].strip('.')) # If second item starts with '*', then atom is labeled label = '' index = 1 if data[1][0] == '*': label = data[1] index += 1 # Next is the element or functional group element # A list can be specified with the {,} syntax atom_type = data[index] if atom_type[0] == '[': if not group: raise InvalidAdjacencyListError( "Error on:\n{0}\nA molecule should not assign more than one " "atomtype per atom.".format(adjlist)) atom_type = atom_type[1:-1].split(',') else: atom_type = [atom_type] index += 1 # Next the number of unpaired electrons unpaired_electrons = [] u_state = data[index] if u_state[0] == 'u': if u_state[1] == '[': u_state = u_state[2:-1].split(',') else: u_state = [u_state[1]] for u in u_state: if u == '0': unpaired_electrons.append(0) elif u == '1': unpaired_electrons.append(1) elif u == '2': unpaired_electrons.append(2) elif u == '3': unpaired_electrons.append(3) elif u == '4': unpaired_electrons.append(4) elif u == 'x': if not group: raise InvalidAdjacencyListError( "Error on:\n{0}\nA molecule should not assign a wildcard to " "number of unpaired electrons.".format(adjlist)) else: raise InvalidAdjacencyListError( 'Number of unpaired electrons not recognized on\n{0}.'. format(adjlist)) index += 1 else: raise InvalidAdjacencyListError( 'Number of unpaired electrons not defined on\n{0}.'.format( adjlist)) # Next the number of lone electron pairs (if provided) lone_pairs = [] if len(data) > index: lp_state = data[index] if lp_state[0] == 'p': if lp_state[1] == '[': lp_state = lp_state[2:-1].split(',') else: lp_state = [lp_state[1]] for lp in lp_state: if lp == '0': lone_pairs.append(0) elif lp == '1': lone_pairs.append(1) elif lp == '2': lone_pairs.append(2) elif lp == '3': lone_pairs.append(3) elif lp == '4': lone_pairs.append(4) elif lp == 'x': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nA molecule should not have " "a wildcard assigned to number of lone pairs.". format(adjlist)) else: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{0}\nNumber of lone electron pairs ' 'not recognized.'.format(adjlist)) index += 1 else: if not group: lone_pairs.append(0) else: if not group: lone_pairs.append(0) # Next the number of partial charges (if provided) partial_charges = [] if len(data) > index: e_state = data[index] if e_state[0] == 'c': if e_state[1] == '[': e_state = e_state[2:-1].split(',') else: e_state = [e_state[1:]] for e in e_state: if e == '0': partial_charges.append(0) elif e == '+1': partial_charges.append(1) elif e == '+2': partial_charges.append(2) elif e == '+3': partial_charges.append(3) elif e == '+4': partial_charges.append(4) elif e == '-1': partial_charges.append(-1) elif e == '-2': partial_charges.append(-2) elif e == '-3': partial_charges.append(-3) elif e == '-4': partial_charges.append(-4) elif e == 'x': if not group: raise InvalidAdjacencyListError( "Error on adjacency list:\n{0}\nA molecule should not have " "a wildcard assigned to number of charges.". format(adjlist)) else: raise InvalidAdjacencyListError( 'Error on adjacency list:\n{0}\nNumber of partial charges ' 'not recognized.'.format(adjlist)) index += 1 else: if not group: partial_charges.append(0) else: if not group: partial_charges.append(0) # Next the isotope (if provided) isotope = -1 if len(data) > index: i_state = data[index] if i_state[0] == 'i': isotope = int(i_state[1:]) index += 1 # Next ring membership info (if provided) props = {} if len(data) > index: r_state = data[index] if r_state[0] == 'r': props['inRing'] = bool(int(r_state[1])) index += 1 # Create a new atom based on the above information if group: atom = GroupAtom(atom_type, unpaired_electrons, partial_charges, label, lone_pairs, props) else: atom = Atom(atom_type[0], unpaired_electrons[0], partial_charges[0], label, lone_pairs[0]) if isotope != -1: atom.element = get_element(atom.number, isotope) # Add the atom to the list atoms.append(atom) atom_dict[aid] = atom # Process list of bonds bonds[aid] = {} for datum in data[index:]: # Sometimes commas are used to delimit bonds in the bond list, # so strip them just in case datum = datum.strip(',') aid2, comma, order = datum[1:-1].partition(',') aid2 = int(aid2) if aid == aid2: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAttempted to create a bond between ' 'atom {0:d} and itself.'.format(aid, adjlist)) if order[0] == '[': order = order[1:-1].split(',') else: order = [order] bonds[aid][aid2] = order # Check consistency using bonddict for atom1 in bonds: for atom2 in bonds[atom1]: if atom2 not in bonds: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAtom {0:d} not in bond ' 'dictionary.'.format(atom2, adjlist)) elif atom1 not in bonds[atom2]: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{2}\nFound bond between {0:d} and {1:d}, ' 'but not the reverse.'.format(atom1, atom2, adjlist)) elif bonds[atom1][atom2] != bonds[atom2][atom1]: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{4}\nFound bonds between {0:d} and {1:d}, but of different orders ' '"{2}" and "{3}".'.format(atom1, atom2, bonds[atom1][atom2], bonds[atom2][atom1], adjlist)) # Convert bonddict to use Atom[group] and Bond[group] objects atomkeys = list(atom_dict.keys()) atomkeys.sort() for aid1 in atomkeys: atomkeys2 = list(bonds[aid1].keys()) atomkeys2.sort() for aid2 in atomkeys2: if aid1 < aid2: atom1 = atom_dict[aid1] atom2 = atom_dict[aid2] order = bonds[aid1][aid2] if group: bond = GroupBond(atom1, atom2, order) elif len(order) == 1: bond = Bond(atom1, atom2, order[0]) else: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{0}\nMultiple bond orders specified for ' 'an atom in a Molecule.'.format(adjlist)) atom1.edges[atom2] = bond atom2.edges[atom1] = bond if saturate_h: # Add explicit hydrogen atoms to complete structure if desired if not group: Saturator.saturate(atoms) # Consistency checks if not group: # Molecule consistency check # Electron and valency consistency check for each atom for atom in atoms: ConsistencyChecker.check_partial_charge(atom) n_rad = sum([atom.radical_electrons for atom in atoms]) absolute_spin_per_electron = 1 / 2. if multiplicity is None: multiplicity = 2 * (n_rad * absolute_spin_per_electron) + 1 ConsistencyChecker.check_multiplicity(n_rad, multiplicity) for atom in atoms: ConsistencyChecker.check_hund_rule(atom, multiplicity) return atoms, multiplicity else: # Currently no group consistency check return atoms, multiplicity
def assign_representative_molecule(self): # create a molecule from fragment.vertices.copy mapping = self.copyAndMap() # replace CuttingLabel with CC atoms = [] additional_atoms = [] additional_bonds = [] for vertex in self.vertices: mapped_vertex = mapping[vertex] if isinstance(mapped_vertex, CuttingLabel): # replace cutting label with atom C atom_C1 = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) for bondedAtom, bond in mapped_vertex.edges.iteritems(): new_bond = Bond(bondedAtom, atom_C1, order=bond.order) bondedAtom.edges[atom_C1] = new_bond del bondedAtom.edges[mapped_vertex] atom_C1.edges[bondedAtom] = new_bond # add hydrogens and carbon to make it CC atom_H1 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H2 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_C2 = Atom(element=getElement('C'), radicalElectrons=0, charge=0, lonePairs=0) atom_H3 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H4 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atom_H5 = Atom(element=getElement('H'), radicalElectrons=0, charge=0, lonePairs=0) atoms.append(atom_C1) additional_atoms.extend( [atom_H1, atom_H2, atom_H3, atom_H4, atom_H5, atom_C2]) additional_bonds.extend([ Bond(atom_C1, atom_H1, 1), Bond(atom_C1, atom_H2, 1), Bond(atom_C2, atom_H3, 1), Bond(atom_C2, atom_H4, 1), Bond(atom_C2, atom_H5, 1), Bond(atom_C1, atom_C2, 1) ]) else: atoms.append(mapped_vertex) mol_repr = Molecule() mol_repr.atoms = atoms for atom in additional_atoms: mol_repr.addAtom(atom) for bond in additional_bonds: mol_repr.addBond(bond) # update connectivity mol_repr.update() # create a species object from molecule self.mol_repr = mol_repr return mapping
def from_old_adjacency_list(adjlist, group=False, saturate_h=False): """ Convert a pre-June-2014 string adjacency list `adjlist` into a set of :class:`Atom` and :class:`Bond` objects. It can read both "old style" that existed for years, an the "intermediate style" that existed for a few months in 2014, with the extra column of integers for lone pairs. """ atoms = [] atomdict = {} bonds = {} try: adjlist = adjlist.strip() lines = adjlist.splitlines() if adjlist == '' or len(lines) == 0: raise InvalidAdjacencyListError('Empty adjacency list.') # Skip the first line if it contains a label if len(lines[0].split()) == 1: label = lines.pop(0) if len(lines) == 0: raise InvalidAdjacencyListError( """Error in adjacency list\n{0}\nNo atoms specified.""". format(adjlist)) mistake1 = re.compile(r'\{[^}]*\s+[^}]*\}') atomic_multiplicities = { } # these are no longer stored on atoms, so we make a separate dictionary # Iterate over the remaining lines, generating Atom or GroupAtom objects for line in lines: # Sometimes people put spaces after commas, which messes up the # parse-by-whitespace. Examples include '{Cd, Ct}'. if mistake1.search(line): raise InvalidAdjacencyListError( "Error in adjacency list: \n{1}\nspecies shouldn't have spaces inside " "braces: {0}".format( mistake1.search(line).group(), adjlist)) # Sometimes commas are used to delimit bonds in the bond list, # so replace them just in case line = line.replace('},{', '} {') data = line.split() # Skip if blank line if len(data) == 0: continue # First item is index for atom # Sometimes these have a trailing period (as if in a numbered list), # so remove it just in case aid = int(data[0].strip('.')) # If second item starts with '*', then atom is labeled label = '' index = 1 if data[1][0] == '*': label = data[1] index += 1 # Next is the element or functional group element # A list can be specified with the {,} syntax atom_type = data[index] if atom_type[0] == '{': atom_type = atom_type[1:-1].split(',') else: atom_type = [atom_type] index += 1 # Next is the electron state radical_electrons = [] additional_lone_pairs = [] elec_state = data[index].upper() if elec_state[0] == '{': elec_state = elec_state[1:-1].split(',') else: elec_state = [elec_state] if len(elec_state) == 0: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nThere must be some electronic state defined for an " "old adjlist".format(adjlist)) for e in elec_state: if e == '0': radical_electrons.append(0) additional_lone_pairs.append(0) elif e == '1': radical_electrons.append(1) additional_lone_pairs.append(0) elif e == '2': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nNumber of radical electrons = 2 is not specific enough. " "Please use 2S or 2T.".format(adjlist)) # includes 2S and 2T radical_electrons.append(0) additional_lone_pairs.append(1) radical_electrons.append(2) additional_lone_pairs.append(0) elif e == '2S': radical_electrons.append(0) additional_lone_pairs.append(1) elif e == '2T': radical_electrons.append(2) additional_lone_pairs.append(0) elif e == '3': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nNumber of radical electrons = 3 is not specific enough. " "Please use 3D or 3Q.".format(adjlist)) # includes 3D and 3Q radical_electrons.append(1) additional_lone_pairs.append(1) radical_electrons.append(3) additional_lone_pairs.append(0) elif e == '3D': radical_electrons.append(1) additional_lone_pairs.append(1) elif e == '3Q': radical_electrons.append(3) additional_lone_pairs.append(0) elif e == '4': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nNumber of radical electrons = 4 is not specific enough. " "Please use 4S, 4T, or 4V.".format(adjlist)) # includes 4S, 4T, and 4V radical_electrons.append(0) additional_lone_pairs.append(2) radical_electrons.append(2) additional_lone_pairs.append(1) radical_electrons.append(4) additional_lone_pairs.append(0) elif e == '4S': radical_electrons.append(0) additional_lone_pairs.append(2) elif e == '4T': radical_electrons.append(2) additional_lone_pairs.append(1) elif e == '4V': radical_electrons.append(4) additional_lone_pairs.append(0) elif e == 'X': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nNumber of radical electrons = X is not specific enough. " "Wildcards should only be used for groups.".format( adjlist)) radical_electrons = [] index += 1 # Next number defines the number of lone electron pairs (if provided) lone_pairs_of_electrons = None if len(data) > index: lp_state = data[index] if lp_state[0] == '{': # this is the start of the chemical bonds - no lone pair info was provided lone_pairs_of_electrons = None else: if lp_state == '0': lone_pairs_of_electrons = 0 if lp_state == '1': lone_pairs_of_electrons = 1 if lp_state == '2': lone_pairs_of_electrons = 2 if lp_state == '3': lone_pairs_of_electrons = 3 if lp_state == '4': lone_pairs_of_electrons = 4 index += 1 else: # no bonds or lone pair info provided. lone_pairs_of_electrons = None # Create a new atom based on the above information if group: if lone_pairs_of_electrons is not None: lone_pairs_of_electrons = [ additional + lone_pairs_of_electrons for additional in additional_lone_pairs ] atom = GroupAtom( atomtype=atom_type, radical_electrons=sorted(set(radical_electrons)), charge=None, label=label, lone_pairs=lone_pairs_of_electrons, # Assign lone_pairs_of_electrons as None if it is not explicitly provided ) else: if lone_pairs_of_electrons is not None: # Intermediate adjlist representation lone_pairs_of_electrons = lone_pairs_of_electrons + additional_lone_pairs[ 0] else: # Add the standard number of lone pairs with the additional lone pairs lone_pairs_of_electrons = PeriodicSystem.lone_pairs[ atom_type[0]] + additional_lone_pairs[0] atom = Atom( element=atom_type[0], radical_electrons=radical_electrons[0], charge=0, label=label, lone_pairs=lone_pairs_of_electrons, ) # Add the atom to the list atoms.append(atom) atomdict[aid] = atom # Process list of bonds bonds[aid] = {} for datum in data[index:]: # Sometimes commas are used to delimit bonds in the bond list, # so strip them just in case datum = datum.strip(',') aid2, comma, order = datum[1:-1].partition(',') aid2 = int(aid2) if aid == aid2: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAttempted to create a bond between ' 'atom {0:d} and itself.'.format(aid, adjlist)) if order[0] == '{': order = order[1:-1].split(',') else: order = [order] bonds[aid][aid2] = order # Check consistency using bonddict for atom1 in bonds: for atom2 in bonds[atom1]: if atom2 not in bonds: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAtom {0:d} not in bond dictionary.' .format(atom2, adjlist)) elif atom1 not in bonds[atom2]: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{2}\nFound bond between {0:d} and {1:d}, ' 'but not the reverse'.format(atom1, atom2, adjlist)) elif bonds[atom1][atom2] != bonds[atom2][atom1]: raise InvalidAdjacencyListError( 'Error in adjacency list: \n{4}\nFound bonds between {0:d} and {1:d}, but of different orders ' '"{2}" and "{3}".'.format(atom1, atom2, bonds[atom1][atom2], bonds[atom2][atom1], adjlist)) # Convert bonddict to use Atom[group] and Bond[group] objects atomkeys = list(atomdict.keys()) atomkeys.sort() for aid1 in atomkeys: atomkeys2 = list(bonds[aid1].keys()) atomkeys2.sort() for aid2 in atomkeys2: if aid1 < aid2: atom1 = atomdict[aid1] atom2 = atomdict[aid2] order = bonds[aid1][aid2] if group: bond = GroupBond(atom1, atom2, order) elif len(order) == 1: bond = Bond(atom1, atom2, order[0]) else: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{0}\nMultiple bond orders specified ' 'for an atom.'.format(adjlist)) atom1.edges[atom2] = bond atom2.edges[atom1] = bond if not group: if saturate_h: # Add explicit hydrogen atoms to complete structure if desired new_atoms = [] for atom in atoms: try: valence = PeriodicSystem.valences[atom.symbol] except KeyError: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nCannot add hydrogens: Unknown ' 'valence for atom "{0}".'.format( atom.symbol, adjlist)) radical = atom.radical_electrons order = atom.get_total_bond_order() count = valence - radical - int( order) - 2 * (atom.lone_pairs - PeriodicSystem.lone_pairs[atom.symbol]) for i in range(count): a = Atom(element='H', radical_electrons=0, charge=0, label='', lone_pairs=0) b = Bond(atom, a, 'S') new_atoms.append(a) atom.bonds[a] = b a.bonds[atom] = b atoms.extend(new_atoms) # Calculate the multiplicity for the molecule and update the charges on each atom n_rad = 0 # total number of radical electrons for atom in atoms: atom.update_charge() n_rad += atom.radical_electrons multiplicity = n_rad + 1 # 2 s + 1, where s is the combined spin of unpaired electrons (s = 1/2 per unpaired electron) else: # Don't set a multiplicity for groups when converting from an old adjlist multiplicity = None except InvalidAdjacencyListError: logging.error("Troublesome adjacency list:\n" + adjlist) raise return atoms, multiplicity
def test_oxygen_3_lone_pairs(self): mol = Molecule(atoms=[Atom(element='O', lone_pairs=3)]) unexpected = _has_unexpected_lone_pairs(mol) self.assertTrue(unexpected)
def test_normal_oxygen(self): mol = Molecule(atoms=[Atom(element='O', lone_pairs=2)]) unexpected = _has_unexpected_lone_pairs(mol) self.assertFalse(unexpected)
def test_singlet_carbon(self): mol = Molecule(atoms=[Atom(element='C', lone_pairs=1)]) unexpected = _has_unexpected_lone_pairs(mol) self.assertTrue(unexpected)
def generate_radicals( species: Type[Species], types: List[str], react_aromatic_rings: bool = False, ): """ Generate all radicals for a species by radical type. Args: species (Species): The RMG Species instance to process. The ``label`` attribute of ``species`` should not be empty. types (List[str]): Entries are types of radicals to return. react_aromatic_rings (bool, optional): Whether to also consider hydrogen atoms on aromatic rings. Default: ``False``. Returns: List[Tuple[str, str]]: Entries are tuples representing the generated radical species, the first entry in the tuple is a label, the second entry is the respective SMILES representation. """ radicals, existing_radical_indices, relevant_radical_indices, output = list( ), list(), list(), list() if species is None or len(species.molecule[0].atoms) == 1: return radicals species = species.copy(deep=True) species.generate_resonance_structures(keep_isomorphic=False, filter_structures=True) # generate all normal "radicals", whether requested or not for molecule in species.molecule: if not molecule.reactive: continue existing_radical_indices = [ molecule.atoms.index(atom) for atom in molecule.atoms if atom.radical_electrons ] for atom_1 in molecule.atoms: if atom_1.is_hydrogen(): for atom_2, bond_12 in atom_1.edges.items(): if bond_12.is_single(): # skipping hydrogen bonds break else: continue if not react_aromatic_rings and any( bond.is_benzene() for bond in atom_2.edges.values()): continue mol_copy = molecule.copy(deep=True) # We are about to change the connectivity of the atoms in the molecule, # which will invalidate any existing vertex connectivity information; thus we reset it. mol_copy.reset_connectivity_values() # get the corresponding bond_12 in mol_copy for atom_2_copy, bond_12_copy in mol_copy.atoms[ molecule.atoms.index(atom_1)].edges.items(): if bond_12_copy.is_single(): # skipping hydrogen bonds break else: continue mol_copy.remove_bond(bond_12_copy) mol_splits = mol_copy.split() if len(mol_splits) == 2: mol_1, mol_2 = mol_splits else: # something went wrong, don't use these molecules continue derivative_mol = mol_1 if len(mol_2.atoms) == 1 else mol_2 radicals_added = 0 for atom in derivative_mol.atoms: theoretical_charge = elements.PeriodicSystem.valence_electrons[atom.symbol] \ - atom.get_total_bond_order() \ - atom.radical_electrons - \ 2 * atom.lone_pairs if theoretical_charge == atom.charge + 1: # we're missing a radical electron on this atom atom.increment_radical() radicals_added += 1 if radicals_added != 1: # something went wrong, don't use these molecules continue derivative_mol.update(raise_atomtype_exception=False) species_from_derivative_mol = Species( molecule=[derivative_mol]) species_from_derivative_mol.generate_resonance_structures( keep_isomorphic=False, filter_structures=True) for existing_radical in radicals: species_from_existing_radical = Species( molecule=[existing_radical]) species_from_existing_radical.generate_resonance_structures( keep_isomorphic=False, filter_structures=True) if species_from_derivative_mol.is_isomorphic( species_from_existing_radical): break else: radicals.append(derivative_mol) index_shift = 1 if len(mol_1.atoms) == 1 else 0 radical_atom_index = [ derivative_mol.atoms.index(atom) for atom in derivative_mol.atoms if atom.radical_electrons and derivative_mol.atoms.index(atom) + index_shift not in existing_radical_indices ][0] relevant_radical_indices.append(radical_atom_index) for i, radical_mol in enumerate(radicals): if 'radical' in types: output.append((f'{species.label}_radical_{i}', radical_mol.copy(deep=True).to_smiles())) if 'alkoxyl' in types: alkoxyl = radical_mol.copy(deep=True) oxygen = Atom(element='O', radical_electrons=1, charge=0, lone_pairs=2) alkoxyl.add_atom(oxygen) alkoxyl.atoms[relevant_radical_indices[i]].decrement_radical() new_bond = Bond(atom1=alkoxyl.atoms[relevant_radical_indices[i]], atom2=oxygen, order=1) alkoxyl.add_bond(new_bond) output.append( (f'{species.label}_alkoxyl_{i}', alkoxyl.to_smiles())) if 'peroxyl' in types: peroxyl = radical_mol.copy(deep=True) oxygen_1 = Atom(element='O', radical_electrons=0, charge=0, lone_pairs=2) oxygen_2 = Atom(element='O', radical_electrons=1, charge=0, lone_pairs=2) peroxyl.add_atom(oxygen_1) peroxyl.add_atom(oxygen_2) peroxyl.atoms[relevant_radical_indices[i]].decrement_radical() new_bond_1 = Bond(atom1=peroxyl.atoms[relevant_radical_indices[i]], atom2=oxygen_1, order=1) new_bond_2 = Bond(atom1=oxygen_1, atom2=oxygen_2, order=1) peroxyl.add_bond(new_bond_1) peroxyl.add_bond(new_bond_2) output.append( (f'{species.label}_peroxyl_{i}', peroxyl.to_smiles())) return output