def do_closure(self, text): num = normalize_closure(text) if self.closures.has_key(num): prev_atom, bond = self.closures[num] del self.closures[num] assert self._pending_bond is not None, "Can't happen" if self._pending_bond is not implicit_bond and \ bond is not implicit_bond and \ self._pending_bond.symbol != "-": # according to toolkit # need to verify they are compatible prev_symbol = bond.symbol symbol = self._pending_bond.symbol if (prev_symbol == symbol) or \ (prev_symbol == "/" and symbol == "\\") or \ (prev_symbol == "\\" and symbol == "/"): pass else: raise AssertionError("bond types don't match") elif bond is implicit_bond and self._pending_bond is not implicit_bond: # see if one of the bonds is not implicit and keep it bond = self._pending_bond elif bond is implicit_bond: # both are implicit so make a new one bond = Bond() bond._closure = 1 atom = self._prev_atoms[-1] if prev_atom is atom: raise AssertionError("cannot close a ring with itself") bond.atoms[:] = [prev_atom, atom] prev_atom._closure = 1 atom._closure = 1 ##self.mol.add_bond(bond, prev_atom, atom) bond.atoms = [prev_atom, atom] atom.bonds.append(bond) prev_atom.bonds.append(bond) atom.oatoms.append(prev_atom) prev_atom.oatoms.append(atom) self.bonds.append(bond) if self.vfgraph: index1, index2 = prev_atom.index, atom.index insert_edge = self.insert_edge insert_edge(index1, index2, bond) insert_edge(index2, index1, bond) else: self.closures[num] = (self._prev_atoms[-1], self._pending_bond) self._pending_bond = implicit_bond
def do_bond(self, text): assert self._pending_bond in (implicit_bond, None) symbol, bondorder, bondtype, equiv_class, stereo = BONDLOOKUP[text] # if the bond came in as aromatic (which it # CAN'T!)) if bondtype == 4: assert 0, "Bond's shouldn't come in as ':'" fixed = 0 else: fixed = 1 bond = Bond(text, bondorder, bondtype, fixed, stereo) bond.equiv_class = equiv_class self._pending_bond = bond
def addHydrogens(mol): if not mol.explicitHydrogens: for atom in mol.atoms: for i in range(atom.hcount): hatom = Atom() hatom.symbol = 'H' #is the coordinates of hydrogen stored somewhere in the #atom the hydrogen is attached to? ## hatom.x = ## hatom.y = ## hatom.z = #does the hcount need to be changed? ## atom.hcount -= 1 mol.add_atom(hatom) bond = Bond() mol.add_bond(bond, atom, hatom) #reset atom indices index = 0 for atom in mol.atoms: atom.index = index index += 1 mol.explicitHydrogens = 1 #change flag to let know that hydrogens are explicit in this mol return mol
def add_atom(self, atom): atoms = self.atoms atom.index = len(atoms) atoms.append(atom) if self.vfgraph: index = self.insert_node(atom) ## assert index == atom.index, "%s <--> %s"%(index, atom.index) ## self.mol.add_atom(atom) if self._pending_bond == implicit_bond: # Implicit single or aromatic bond self._pending_bond = Bond() if self._pending_bond is not None: bond = self._pending_bond prev_atom = self._prev_atoms[-1] bond.atoms[:] = [prev_atom, atom] ##self.mol.add_bond(bond, prev_atom, atom) bond.atoms = [prev_atom, atom] atom.bonds.append(bond) prev_atom.bonds.append(bond) atom.oatoms.append(prev_atom) prev_atom.oatoms.append(atom) self.bonds.append(bond) if self.vfgraph: index1, index2 = prev_atom.index, atom.index insert_edge = self.insert_edge insert_edge(index1, index2, bond) insert_edge(index2, index1, bond) self._pending_bond = implicit_bond if not self._prev_atoms: self._prev_atoms.append(atom) else: self._prev_atoms[-1] = atom
def reader(file, stripHydrogens=1): lines = collector(file) while 1: try: fields = {} name = lines.next().strip() userLine = lines.next().strip() comment = lines.next().strip() molinfo = lines.next() numAtoms, numBonds = int(molinfo[0:3]), int(molinfo[3:6]) atoms = [] # this is the full list of atoms _atoms = [] # this is the (potentially stripped list # of atoms. I.e. no hydrogens.) i = 0 for index in range(numAtoms): line = lines.next() x,y,z,symbol,mass,charge,stereo,hcount,hcount_fixed = parse_atom(line) if symbol == "H" and stripHydrogens: atoms.append(None) else: atom = Atom() atoms.append(atom) _atoms.append(atom) atom.set_symbol(symbol)# = symbol atom.explicit_hcount = hcount atom.charge = charge atom._line = line atom.x = x atom.y = y atom.z = z if hcount_fixed: print "hcount fixed" atom.fixed_hcount = 1 # oops, we shouldn't use this atom.has_explicit_hcount = True if mass: atom.weight = atom.mass + mass atom.index = i i = i + 1 bonds = [] for index in range(numBonds): line = lines.next() a1, a2, bondtype, stereo, remainder = parse_bond(line) symbol, bondorder, bondtype, fixed = BOND_SYMBOL[bondtype] atom1, atom2 = atoms[a1], atoms[a2] if atom1 is not None and atom2 is not None: h1, h2 = atom1.handle, atom2.handle bond = Bond(symbol, bondorder, bondtype, fixed) bonds.append(bond) bond._line = remainder bond.index = index bond.atoms = [atom1, atom2] try: bond.stereo = BOND_LOOKUP_STEREO[bondtype-1][stereo] except KeyError: raise MolReaderError("An SD record cannot have a bondtype of %s and a stereo value of %s"%(bondtype, stereo)) except IndexError: print "*"*44 print line print "bondtype, stereo", bondtype, stero raise atom1.bonds.append(bond) atom2.bonds.append(bond) atom1.oatoms.append(atom2) atom2.oatoms.append(atom1) if atom1.symbol == "H": atom2.explicit_hcount += 1 if atom2.symbol == "H": atom1.explicit_hcount += 1 else: if atom1 is None and atom2 is not None: atom2.explicit_hcount += 1 elif atom2 is None and atom1 is not None: atom1.explicit_hcount += 1 ############################################################## # read the mprops if necessary line = lines.next().strip() while 1: if line and line[0:6] == "M END": line = lines.next().strip() break elif line == "M CHG": groups = line[6:].split()[1:] index = 0 while index < len(groups): atomIndex = int(groups[index]) - 1 atom = self.atoms[atomIndex] charge = int(groups[index+1]) self.atoms[atomIndex].charge = charge index += 2 line = lines.next().strip() elif line and line[0] == ">": break elif line[0:4] == "$$$$": break line = lines.next().strip() # What about end of mol? ############################################################# # read the fields if necessary while line != "$$$$": if line and line[0] == ">": res = FIELDPATTERN.match(line) if res: field, potentialID = res.groups() else: res = ALTFIELDPATTERN.match(line) if res: field = res.groups()[0] potentialID = None else: field, potentialID = None, None if name is None: name = potentialID if field: data = [] line = lines.next().strip() while line and line != "$$$$": data.append(line) line = lines.next().strip() fields[field] = os.linesep.join(data) line = lines.next().strip() mol = Molecule(_atoms, bonds) mol.name = name mol.fields = fields mol.name = name yield mol, lines.dump(), None except StopIteration: break except Exception: line = lines.current.strip() while line[0:4] != "$$$$": line = lines.next().strip() stdout, stderr = sys.stdout, sys.stderr sys.stdout = sys.stderr = io = StringIO() traceback.print_exc() sys.stdout = stdout sys.stderr = stderr yield None, lines.dump(), io.getvalue()
class MolReader: def __init__(self, file, stripHydrogens=1): self.file = file self.iterator = iter(file) self._lastlines = [] # lastlines stores the original lines that made up the # last molecule read self.stripHydrogens = stripHydrogens self._lastline = None def _readline(self, endOk=0): """internal readline function, if endOk is 0 then upon an end of line a MolReaderError is generated""" if self._lastline: res = self._lastline self._lastline = None return res try: line = self.iterator.next() self._lastlines.append(line) except StopIteration: line = None if not line and not endOk: raise MolReaderError, "Unexpected end of file" return line def _pushback(self, line): self._lastline = line def _clear(self): """Clear the _lastlines buffer""" self._lastlines = [] def get_text(self): """->text that formed the last molecule read""" return "".join(self._lastlines) def get_lines(self): """->the lines of text that formed the last molecule read""" return self._lastlines def _read_to_next(self): readline = self._readline endOfMol = self._endOfMol while 1: line = readline(endOk=1) if not line: break if endOfMol(line): break def _endOfMol(self, line): """(line)-> return 1 if the line signifies the end of molecule 0 otherwise""" if line[0:4] == "$$$$": return 1 return 0 def _readFields(self, pattern=re.compile(">\s+<([^>]+)>\s+\(*([^)]*)") ): """Read the database field component at the end of a molecule record. Sets a dictionary of key->values""" readline = self._readline endOfMol = self._endOfMol fields = {} name = None while 1: # by setting endOk = 1 we can read mol files as # well as sdfiles line = readline(endOk=1) if not line: break if endOfMol(line): break elif line[0] == ">": # we have a data line so get the field # and potentialID values if not endOfMol(line): res = pattern.match(line) if res: field, potentialID = res.groups() else: field, potentialID = None, None if name is None: name = potentialID elif name != potentialID: name = "UNKNOWN (id clash)" # read the data from the next line if field: line = readline().strip() data = [] while line: data.append(line) line = readline().strip() if not endOfMol(line): fields[field] = os.linesep.join(data) else: break if endOfMol(line): break if not endOfMol(line): # by setting endok = 1 here we can read # mol files as well as sd files line = readline(endOk=1) return fields, name def readMProps(self): readline = self._readline while 1: line = readline() if line[0] == ">": # need to push back the last line self._pushback(line) return if line[0:6] == "M END": break if line[0:6] == "M CHG": # parse the charge line and add charges # to the correct atoms groups = line[6:].split()[1:] index = 0 while index < len(groups): atomIndex = int(groups[index]) - 1 atom = self.atoms[atomIndex] charge = int(groups[index+1]) self.atoms[atomIndex].charge = charge index += 2 def read_one(self): """Read one molecule from the sd file""" self._clear() readline = self._readline endOfMol = self._endOfMol try: name = readline().strip() userLine = readline() comment = readline() line = readline() except MolReaderError, msg: if str(msg) == "Unexpected end of file": return None raise try: numAtoms, numBonds = map(int, (line[0:3], line[3:6])) except ValueError: # XXX FIX ME - trap exceptions and stuff print "cannot parse atom, bond line" self._read_to_next() return None atoms = self.atoms = [] for index in range(numAtoms): line = readline() try: x,y,z,symbol,mass,charge,stereo,hcount,hcount_fixed = parse_atom(line) except: # XXX FIX ME - trap exceptions and stuff self._read_to_next() return None atom = Atom() atom._line = line atom.symbol = symbol atom.explicit_hcount = hcount atom.charge = charge #if hcount_fixed: #symbol, hcount, charge, weight=0, aromatic=0) # XXX FIX ME # a really bad hack here. # ignore please! atom._line = line atom.x = x atom.y = y atom.z = z if hcount_fixed: atom.fixed_hcount = 1 if mass: atom.weight = atom.mass + mass atom.index = len(atoms) atoms.append(atom) if vfgraph: insert_node(atom) bonds = [] mappings = [] bondCount = [0] * len(atoms) closures = {} for index in range(numBonds): line = readline() try: a1, a2, bondtype, remainder = parse_bond(line) except: self._read_to_next() return None a1 -= 1 a2 -= 1 symbol, bondorder, bondtype, fixed = BOND_SYMBOL[bondtype] atom1 = atoms[a1] atom2 = atoms[a2] if stripHydrogen: if atom1.symbol == "H": atom2.hcount += 1 atom2.hcount_fixed = 1 if atom2.symbol == "H": atom1.hcount += 1 atom1.hcount_fixed = 1 else: bond = Bond(symbol, bondorder, bondtype, fixed) bond._line = line # XXX FIX ME # a really bad hack here # ignore please! bond._line = remainder bond.atoms = [a1, a2] a1.bonds.append(bond) a2.bonds.append(bond) a1.oatoms.append(a1) a2.oatoms.append(a2) bonds.append(bond) ## mappings.append((bond, a1, a2)) ## bondCount[a1] += 1 ## bondCount[a2] += 1 self.readMProps() fields, potentialName = self._readFields() if not name: name = potentialName elif name != potentialName: # XXX FIX ME, what do I do here? pass # we've tokenized the molecule, now we need to build one # XXX FIX ME - Split this up into a builder and a tokenizer ? mol = Molecule() mol.name = name mol.fields = fields # for atom in atoms: # mol.add_atom(atom) # for bond, a1, a2 in mappings: # atom1, atom2 = atoms[a1], atoms[a2] # XXX FIX ME # does this format mean the atom's hcount can't # change? # stripHydrogens = self.stripHydrogens # if not hasattr(atom1, "number"): # print atom # print atom.__dict__ ## if stripHydrogens and atom1.symbol == "H" and bondCount[a1] == 1: ## atom2.hcount += 1 ## atom2.hcount_fixed = 1 ## bondCount[a2] -=1 ## mol.remove_atom(atom1) ## elif stripHydrogens and atom2.symbol == "H" and bondCount[a2] == 1: ## atom1.hcount += 1 ## atom1.hcount_fixed = 1 ## bondCount[a1] -= 1 ## mol.remove_atom(atom2) ## else: ## mol.add_bond(bond, atom1, atom2) ## if bond.bondtype == 4: ## atom1.aromatic = 1 ## atom2.aromatic = 1 ## # get rid of any non-bonded hydrogens ## atomsToDelete = [] ## for atom in mol.atoms: ## if atom.symbol == "H": ## assert len(atom.bonds) == 0 ## atomsToDelete.append(atom) ## for atom in atomsToDelete: ## mol.remove_atom(atom) index = 0 for atom in mol.atoms: assert atom.symbol != "H" if len(atom.bonds) > 1: atom._closure = 1 atom.index = index index += 1 return mol