def from_ob_mol(mol, obmol, raise_atomtype_exception=True): """ Convert a OpenBabel Mol object `obmol` to a molecular structure. Uses `OpenBabel <http://openbabel.org/>`_ to perform the conversion. """ # Below are the declared variables for cythonizing the module # cython.declare(i=cython.int) # cython.declare(radical_electrons=cython.int, charge=cython.int, lone_pairs=cython.int) # cython.declare(atom=mm.Atom, atom1=mm.Atom, atom2=mm.Atom, bond=mm.Bond) if openbabel is None: raise DependencyError( 'OpenBabel is not installed. Please install or use RDKit.') mol.vertices = [] # Add hydrogen atoms to complete molecule if needed obmol.AddHydrogens() # TODO Chem.rdmolops.Kekulize(obmol, clearAromaticFlags=True) # iterate through atoms in obmol for obatom in openbabel.OBMolAtomIter(obmol): # Use atomic number as key for element number = obatom.GetAtomicNum() isotope = obatom.GetIsotope() element = elements.get_element(number, isotope or -1) # Process charge charge = obatom.GetFormalCharge() obatom_multiplicity = obatom.GetSpinMultiplicity() radical_electrons = obatom_multiplicity - 1 if obatom_multiplicity != 0 else 0 atom = mm.Atom(element, radical_electrons, charge, '', 0) mol.vertices.append(atom) # iterate through bonds in obmol for obbond in openbabel.OBMolBondIter(obmol): # Process bond type oborder = obbond.GetBondOrder() if oborder not in [1, 2, 3, 4] and obbond.IsAromatic(): oborder = 1.5 bond = mm.Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder) # python array indices start at 0 mol.add_bond(bond) # Set atom types and connectivity values mol.update_connectivity_values() mol.update_atomtypes(log_species=True, raise_exception=raise_atomtype_exception) mol.update_multiplicity() mol.identify_ring_membership() # Assume this is always true # There are cases where 2 radical_electrons is a singlet, but # the triplet is often more stable, mol.multiplicity = mol.get_radical_count() + 1 return mol
def update_xyz_string(self): """ Generate an xyz string built from self.conformer, and standardize the result Returns: str: 3D coordinates in an XYZ format. """ xyz_list = list() if self.conformer is not None and self.conformer.number is not None: # generate the xyz-format string from self.conformer.coordinates and self.conformer.number xyz_list.append(str(len(self.conformer.number.value_si))) xyz_list.append(self.label) for number, coordinate in zip(self.conformer.number.value_si, self.conformer.coordinates.value_si): element_symbol = get_element(int(number)).symbol row = '{0:4}'.format(element_symbol) row += '{0:14.8f}{1:14.8f}{2:14.8f}'.format(*(coordinate * 1e10).tolist()) # convert m to Angstrom xyz_list.append(row) return '\n'.join(xyz_list)
def generate_isotopomers(spc, N=1): """ Generate all isotopomers of the parameter species by adding max. N carbon isotopes to the atoms of the species. """ mol = spc.molecule[0] isotope = get_element(6, 13) mols = [] add_isotope(0, N, mol, mols, isotope) spcs = [] for isomol in mols: isotopomer = Species(molecule=[isomol], thermo=deepcopy(spc.thermo), transport_data=spc.transport_data, reactive=spc.reactive) isotopomer.generate_resonance_structures(keep_isomorphic=True) spcs.append(isotopomer) # do not retain identical species: filtered = [] while spcs: candidate = spcs.pop() unique = True for isotopomer in filtered: if isotopomer.is_isomorphic(candidate): unique = False break if unique: filtered.append(candidate) if spc.thermo: for isotopomer in filtered: correct_entropy(isotopomer, spc) return filtered
def determine_symmetry(xyz: dict) -> Tuple[int, int]: """ Determine external symmetry and chirality (optical isomers) of the species. Args: xyz (dict): The 3D coordinates. Returns: Tuple[int, int] - The external symmetry number. - ``1`` if no chiral centers are present, ``2`` if chiral centers are present. """ atom_numbers = list() # List of atomic numbers for symbol in xyz['symbols']: atom_numbers.append(get_element(symbol).number) # coords is an N x 3 numpy.ndarray of atomic coordinates in the same order as `atom_numbers` coords = np.array(xyz['coords'], np.float64) unique_id = '0' # Just some name that the SYMMETRY code gives to one of its jobs scr_dir = os.path.join( arc_path, 'scratch' ) # Scratch directory that the SYMMETRY code writes its files in if not os.path.exists(scr_dir): os.makedirs(scr_dir) symmetry = optical_isomers = 1 qmdata = QMData( groundStateDegeneracy=1, # Only needed to check if valid QMData numberOfAtoms=len(atom_numbers), atomicNumbers=atom_numbers, atomCoords=(coords, 'angstrom'), energy=(0.0, 'kcal/mol') # Only needed to avoid error ) symmetry_settings = type( '', (), dict(symmetryPath='symmetry', scratchDirectory=scr_dir))() pgc = PointGroupCalculator(symmetry_settings, unique_id, qmdata) pg = pgc.calculate() if pg is not None: symmetry = pg.symmetry_number optical_isomers = 2 if pg.chiral else optical_isomers return symmetry, optical_isomers
def is_enriched(obj): """ Returns True if the species or reaction object has any enriched isotopes. """ if isinstance(obj, Species): for atom in obj.molecule[0].atoms: if atom.element.isotope != -1 and not np.allclose( atom.element.mass, get_element(atom.element.symbol).mass): return True return False elif isinstance(obj, Reaction): enriched = [] for spec in obj.reactants: enriched.append(is_enriched(spec)) for spec in obj.products: enriched.append(is_enriched(spec)) return any(enriched) else: raise TypeError( 'is_enriched only takes species and reaction objects. {} was sent'. format(str(type(obj))))
def remove_isotope(labeled_obj, inplace=False): """ Create a deep copy of the first molecule of the species object and replace non-normal Element objects (of special isotopes) by the expected isotope. If the boolean `inplace` is True, the method remove the isotopic atoms of the Species/Reaction inplace and returns a list of atom objects & element pairs for adding back to the oritinal object. This should significantly improve speed of this method. If successful, the non-inplace parts should be removed """ if isinstance(labeled_obj, Species): if inplace: modified_atoms = [] for mol in labeled_obj.molecule: for atom in mol.atoms: if atom.element.isotope != -1: modified_atoms.append((atom, atom.element)) atom.element = get_element(atom.element.symbol) return modified_atoms else: stripped = labeled_obj.copy(deep=True) for atom in stripped.molecule[0].atoms: if atom.element.isotope != -1: atom.element = get_element(atom.element.symbol) # only do it for the first molecule, generate the other resonance isomers. stripped.molecule = [stripped.molecule[0]] stripped.generate_resonance_structures(keep_isomorphic=True) return stripped elif isinstance(labeled_obj, Reaction): if inplace: atom_list = [] for reactant in labeled_obj.reactants: removed = remove_isotope(reactant, inplace) if removed: atom_list += removed for product in labeled_obj.products: removed = remove_isotope(product, inplace) if removed: atom_list += removed return atom_list else: stripped_rxn = labeled_obj.copy() stripped_reactants = [] for reactant in stripped_rxn.reactants: stripped_reactants.append(remove_isotope(reactant, inplace)) stripped_rxn.reactants = stripped_reactants stripped_products = [] for product in stripped_rxn.products: stripped_products.append(remove_isotope(product, inplace)) stripped_rxn.products = stripped_products return stripped_rxn elif isinstance(labeled_obj, Molecule): if inplace: modified_atoms = [] for atom in labeled_obj.atoms: if atom.element.isotope != -1: modified_atoms.append((atom, atom.element)) atom.element = get_element(atom.element.symbol) return modified_atoms else: stripped = labeled_obj.copy(deep=True) for atom in stripped.atoms: if atom.element.isotope != -1: atom.element = get_element(atom.element.symbol) return stripped else: raise TypeError( 'Only Reaction, Species, and Molecule objects are supported')
def from_rdkit_mol(mol, rdkitmol): """ Convert a RDKit Mol object `rdkitmol` to a molecular structure. Uses `RDKit <http://rdkit.org/>`_ to perform the conversion. This Kekulizes everything, removing all aromatic atom types. """ cython.declare(i=cython.int, radical_electrons=cython.int, charge=cython.int, lone_pairs=cython.int, number=cython.int, order=cython.float, atom=mm.Atom, atom1=mm.Atom, atom2=mm.Atom, bond=mm.Bond) mol.vertices = [] # Add hydrogen atoms to complete molecule if needed rdkitmol.UpdatePropertyCache(strict=False) rdkitmol = Chem.AddHs(rdkitmol) Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True) # iterate through atoms in rdkitmol for i in range(rdkitmol.GetNumAtoms()): rdkitatom = rdkitmol.GetAtomWithIdx(i) # Use atomic number as key for element number = rdkitatom.GetAtomicNum() isotope = rdkitatom.GetIsotope() element = elements.get_element(number, isotope or -1) # Process charge charge = rdkitatom.GetFormalCharge() radical_electrons = rdkitatom.GetNumRadicalElectrons() atom = mm.Atom(element, radical_electrons, charge, '', 0) mol.vertices.append(atom) # Add bonds by iterating again through atoms for j in range(0, i): rdkitbond = rdkitmol.GetBondBetweenAtoms(i, j) if rdkitbond is not None: order = 0 # Process bond type rdbondtype = rdkitbond.GetBondType() if rdbondtype.name == 'SINGLE': order = 1 elif rdbondtype.name == 'DOUBLE': order = 2 elif rdbondtype.name == 'TRIPLE': order = 3 elif rdbondtype.name == 'QUADRUPLE': order = 4 elif rdbondtype.name == 'AROMATIC': order = 1.5 bond = mm.Bond(mol.vertices[i], mol.vertices[j], order) mol.add_bond(bond) # We need to update lone pairs first because the charge was set by RDKit mol.update_lone_pairs() # Set atom types and connectivity values mol.update() # Assume this is always true # There are cases where 2 radical_electrons is a singlet, but # the triplet is often more stable, mol.multiplicity = mol.get_radical_count() + 1 # mol.update_atomtypes() return mol
def from_adjacency_list(adjlist, group=False, saturate_h=False): """ Convert a string adjacency list `adjlist` into a set of :class:`Atom` and :class:`Bond` objects. """ atoms = [] atom_dict = {} bonds = {} multiplicity = None adjlist = adjlist.strip() lines = adjlist.splitlines() if adjlist == '' or len(lines) == 0: raise InvalidAdjacencyListError('Empty adjacency list.') # Detect old-style adjacency lists by looking at the last line's syntax last_line = lines[-1].strip() while not last_line: # Remove any empty lines from the end lines.pop() last_line = lines[-1].strip() if re_intermediate_adjlist.match(last_line): logging.debug( "adjacency list:\n{1}\nline '{0}' looks like an intermediate style " "adjacency list".format(last_line, adjlist)) return from_old_adjacency_list(adjlist, group=group, saturate_h=saturate_h) if re_old_adjlist.match(last_line): logging.debug( "Adjacency list:\n{1}\nline '{0}' looks like an old style adjacency list" .format(last_line, adjlist)) if not group: logging.debug("Will assume implicit H atoms") return from_old_adjacency_list(adjlist, group=group, saturate_h=(not group)) # Interpret the first line if it contains a label if len(lines[0].split()) == 1: label = lines.pop(0) if len(lines) == 0: raise InvalidAdjacencyListError( 'No atoms specified in adjacency list.') # Interpret the second line if it contains a multiplicity if lines[0].split()[0] == 'multiplicity': line = lines.pop(0) if group: match = re.match( r'\s*multiplicity\s+\[\s*(\d(?:,\s*\d)*)\s*\]\s*$', line) if not match: rematch = re.match(r'\s*multiplicity\s+x\s*$', line) if not rematch: raise InvalidAdjacencyListError( "Invalid multiplicity line '{0}'. Should be a list like " "'multiplicity [1,2,3]' or a wildcard 'multiplicity x'" .format(line)) else: # should match "multiplicity [1]" or " multiplicity [ 1, 2, 3 ]" or " multiplicity [1,2,3]" # and whatever's inside the [] (excluding leading and trailing spaces) should be captured as group 1. # If a wildcard is desired, this line can be omitted or replaced with 'multiplicity x' # Multiplicities must be only one digit (i.e. less than 10) # The (?:,\s*\d)* matches patters like ", 2" 0 or more times, but doesn't capture them (because of the leading ?:) multiplicities = match.group(1).split(',') multiplicity = [int(i) for i in multiplicities] else: match = re.match(r'\s*multiplicity\s+\d+\s*$', line) if not match: raise InvalidAdjacencyListError( "Invalid multiplicity line '{0}'. Should be an integer like " "'multiplicity 2'".format(line)) multiplicity = int(line.split()[1]) if len(lines) == 0: raise InvalidAdjacencyListError( 'No atoms specified in adjacency list: \n{0}'.format(adjlist)) mistake1 = re.compile(r'\{[^}]*\s+[^}]*\}') # Iterate over the remaining lines, generating Atom or GroupAtom objects for line in lines: # Sometimes people put spaces after commas, which messes up the # parse-by-whitespace. Examples include '[Cd, Ct]'. if mistake1.search(line): raise InvalidAdjacencyListError( "{1} Shouldn't have spaces inside braces:\n{0}".format( mistake1.search(line).group(), adjlist)) # Sometimes commas are used to delimit bonds in the bond list, # so replace them just in case line = line.replace('},{', '} {') data = line.split() # Skip if blank line if len(data) == 0: continue # First item is index for atom # Sometimes these have a trailing period (as if in a numbered list), # so remove it just in case aid = int(data[0].strip('.')) # If second item starts with '*', then atom is labeled label = '' index = 1 if data[1][0] == '*': label = data[1] index += 1 # Next is the element or functional group element # A list can be specified with the {,} syntax atom_type = data[index] if atom_type[0] == '[': if not group: raise InvalidAdjacencyListError( "Error on:\n{0}\nA molecule should not assign more than one " "atomtype per atom.".format(adjlist)) atom_type = atom_type[1:-1].split(',') else: atom_type = [atom_type] index += 1 # Next the number of unpaired electrons unpaired_electrons = [] u_state = data[index] if u_state[0] == 'u': if u_state[1] == '[': u_state = u_state[2:-1].split(',') else: u_state = [u_state[1]] for u in u_state: if u == '0': unpaired_electrons.append(0) elif u == '1': unpaired_electrons.append(1) elif u == '2': unpaired_electrons.append(2) elif u == '3': unpaired_electrons.append(3) elif u == '4': unpaired_electrons.append(4) elif u == 'x': if not group: raise InvalidAdjacencyListError( "Error on:\n{0}\nA molecule should not assign a wildcard to " "number of unpaired electrons.".format(adjlist)) else: raise InvalidAdjacencyListError( 'Number of unpaired electrons not recognized on\n{0}.'. format(adjlist)) index += 1 else: raise InvalidAdjacencyListError( 'Number of unpaired electrons not defined on\n{0}.'.format( adjlist)) # Next the number of lone electron pairs (if provided) lone_pairs = [] if len(data) > index: lp_state = data[index] if lp_state[0] == 'p': if lp_state[1] == '[': lp_state = lp_state[2:-1].split(',') else: lp_state = [lp_state[1]] for lp in lp_state: if lp == '0': lone_pairs.append(0) elif lp == '1': lone_pairs.append(1) elif lp == '2': lone_pairs.append(2) elif lp == '3': lone_pairs.append(3) elif lp == '4': lone_pairs.append(4) elif lp == 'x': if not group: raise InvalidAdjacencyListError( "Error in adjacency list:\n{0}\nA molecule should not have " "a wildcard assigned to number of lone pairs.". format(adjlist)) else: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{0}\nNumber of lone electron pairs ' 'not recognized.'.format(adjlist)) index += 1 else: if not group: lone_pairs.append(0) else: if not group: lone_pairs.append(0) # Next the number of partial charges (if provided) partial_charges = [] if len(data) > index: e_state = data[index] if e_state[0] == 'c': if e_state[1] == '[': e_state = e_state[2:-1].split(',') else: e_state = [e_state[1:]] for e in e_state: if e == '0': partial_charges.append(0) elif e == '+1': partial_charges.append(1) elif e == '+2': partial_charges.append(2) elif e == '+3': partial_charges.append(3) elif e == '+4': partial_charges.append(4) elif e == '-1': partial_charges.append(-1) elif e == '-2': partial_charges.append(-2) elif e == '-3': partial_charges.append(-3) elif e == '-4': partial_charges.append(-4) elif e == 'x': if not group: raise InvalidAdjacencyListError( "Error on adjacency list:\n{0}\nA molecule should not have " "a wildcard assigned to number of charges.". format(adjlist)) else: raise InvalidAdjacencyListError( 'Error on adjacency list:\n{0}\nNumber of partial charges ' 'not recognized.'.format(adjlist)) index += 1 else: if not group: partial_charges.append(0) else: if not group: partial_charges.append(0) # Next the isotope (if provided) isotope = -1 if len(data) > index: i_state = data[index] if i_state[0] == 'i': isotope = int(i_state[1:]) index += 1 # Next ring membership info (if provided) props = {} if len(data) > index: r_state = data[index] if r_state[0] == 'r': props['inRing'] = bool(int(r_state[1])) index += 1 # Create a new atom based on the above information if group: atom = GroupAtom(atom_type, unpaired_electrons, partial_charges, label, lone_pairs, props) else: atom = Atom(atom_type[0], unpaired_electrons[0], partial_charges[0], label, lone_pairs[0]) if isotope != -1: atom.element = get_element(atom.number, isotope) # Add the atom to the list atoms.append(atom) atom_dict[aid] = atom # Process list of bonds bonds[aid] = {} for datum in data[index:]: # Sometimes commas are used to delimit bonds in the bond list, # so strip them just in case datum = datum.strip(',') aid2, comma, order = datum[1:-1].partition(',') aid2 = int(aid2) if aid == aid2: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAttempted to create a bond between ' 'atom {0:d} and itself.'.format(aid, adjlist)) if order[0] == '[': order = order[1:-1].split(',') else: order = [order] bonds[aid][aid2] = order # Check consistency using bonddict for atom1 in bonds: for atom2 in bonds[atom1]: if atom2 not in bonds: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{1}\nAtom {0:d} not in bond ' 'dictionary.'.format(atom2, adjlist)) elif atom1 not in bonds[atom2]: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{2}\nFound bond between {0:d} and {1:d}, ' 'but not the reverse.'.format(atom1, atom2, adjlist)) elif bonds[atom1][atom2] != bonds[atom2][atom1]: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{4}\nFound bonds between {0:d} and {1:d}, but of different orders ' '"{2}" and "{3}".'.format(atom1, atom2, bonds[atom1][atom2], bonds[atom2][atom1], adjlist)) # Convert bonddict to use Atom[group] and Bond[group] objects atomkeys = list(atom_dict.keys()) atomkeys.sort() for aid1 in atomkeys: atomkeys2 = list(bonds[aid1].keys()) atomkeys2.sort() for aid2 in atomkeys2: if aid1 < aid2: atom1 = atom_dict[aid1] atom2 = atom_dict[aid2] order = bonds[aid1][aid2] if group: bond = GroupBond(atom1, atom2, order) elif len(order) == 1: bond = Bond(atom1, atom2, order[0]) else: raise InvalidAdjacencyListError( 'Error in adjacency list:\n{0}\nMultiple bond orders specified for ' 'an atom in a Molecule.'.format(adjlist)) atom1.edges[atom2] = bond atom2.edges[atom1] = bond if saturate_h: # Add explicit hydrogen atoms to complete structure if desired if not group: Saturator.saturate(atoms) # Consistency checks if not group: # Molecule consistency check # Electron and valency consistency check for each atom for atom in atoms: ConsistencyChecker.check_partial_charge(atom) n_rad = sum([atom.radical_electrons for atom in atoms]) absolute_spin_per_electron = 1 / 2. if multiplicity is None: multiplicity = 2 * (n_rad * absolute_spin_per_electron) + 1 ConsistencyChecker.check_multiplicity(n_rad, multiplicity) for atom in atoms: ConsistencyChecker.check_hund_rule(atom, multiplicity) return atoms, multiplicity else: # Currently no group consistency check return atoms, multiplicity