Beispiel #1
0
    def do_closure(self, text):
        num = normalize_closure(text)
        if self.closures.has_key(num):
            prev_atom, bond = self.closures[num]
            del self.closures[num]

            assert self._pending_bond is not None, "Can't happen"

            if self._pending_bond is not implicit_bond and \
               bond is not implicit_bond and \
               self._pending_bond.symbol != "-":  # according to toolkit

                # need to verify they are compatible
                prev_symbol = bond.symbol
                symbol = self._pending_bond.symbol
                if (prev_symbol == symbol) or \
                   (prev_symbol == "/" and symbol == "\\") or \
                   (prev_symbol == "\\" and symbol == "/"):
                    pass
                else:
                    raise AssertionError("bond types don't match")
            elif bond is implicit_bond and self._pending_bond is not implicit_bond:
                # see if one of the bonds is not implicit and keep it
                bond = self._pending_bond
            elif bond is implicit_bond:
                # both are implicit so make a new one
                bond = Bond()

            bond._closure = 1
            atom = self._prev_atoms[-1]
            if prev_atom is atom:
                raise AssertionError("cannot close a ring with itself")
            bond.atoms[:] = [prev_atom, atom]
            prev_atom._closure = 1
            atom._closure = 1
            ##self.mol.add_bond(bond, prev_atom, atom)

            bond.atoms = [prev_atom, atom]
            atom.bonds.append(bond)
            prev_atom.bonds.append(bond)
            atom.oatoms.append(prev_atom)
            prev_atom.oatoms.append(atom)
            self.bonds.append(bond)
            if self.vfgraph:
                index1, index2 = prev_atom.index, atom.index
                insert_edge = self.insert_edge
                insert_edge(index1, index2, bond)
                insert_edge(index2, index1, bond)

        else:
            self.closures[num] = (self._prev_atoms[-1], self._pending_bond)
        self._pending_bond = implicit_bond
Beispiel #2
0
    def do_closure(self, text):
        num = normalize_closure(text)
        if self.closures.has_key(num):
            prev_atom, bond = self.closures[num]
            del self.closures[num]

            assert self._pending_bond is not None, "Can't happen"
            
            if self._pending_bond is not implicit_bond and \
               bond is not implicit_bond and \
               self._pending_bond.symbol != "-":  # according to toolkit

                # need to verify they are compatible
                prev_symbol = bond.symbol
                symbol = self._pending_bond.symbol
                if (prev_symbol == symbol) or \
                   (prev_symbol == "/" and symbol == "\\") or \
                   (prev_symbol == "\\" and symbol == "/"):
                    pass
                else:
                    raise AssertionError("bond types don't match")
            elif bond is implicit_bond and self._pending_bond is not implicit_bond:
                # see if one of the bonds is not implicit and keep it
                bond = self._pending_bond
            elif bond is implicit_bond:
                # both are implicit so make a new one
                bond = Bond()

            bond._closure = 1
            atom = self._prev_atoms[-1]
            if prev_atom is atom:
                raise AssertionError("cannot close a ring with itself")
            bond.atoms[:] = [prev_atom, atom]
            prev_atom._closure = 1
            atom._closure = 1
            ##self.mol.add_bond(bond, prev_atom, atom)
            
            bond.atoms = [prev_atom, atom]
            atom.bonds.append(bond)
            prev_atom.bonds.append(bond)
            atom.oatoms.append(prev_atom)
            prev_atom.oatoms.append(atom)
            self.bonds.append(bond)
            if self.vfgraph:
                index1, index2 = prev_atom.index, atom.index
                insert_edge = self.insert_edge
                insert_edge(index1, index2, bond)
                insert_edge(index2, index1, bond)

        else:
            self.closures[num] = (self._prev_atoms[-1], self._pending_bond)
        self._pending_bond = implicit_bond
Beispiel #3
0
 def do_bond(self, text):
     assert self._pending_bond in (implicit_bond, None)
     symbol, bondorder, bondtype, equiv_class, stereo = BONDLOOKUP[text]
     # if the bond came in as aromatic (which it
     #  CAN'T!))
     if bondtype == 4:
         assert 0, "Bond's shouldn't come in as ':'"
         fixed = 0
     else:
         fixed = 1
     bond = Bond(text, bondorder, bondtype, fixed, stereo)
     bond.equiv_class = equiv_class
     self._pending_bond = bond
Beispiel #4
0
 def do_bond(self, text):
     assert self._pending_bond in (implicit_bond, None)
     symbol, bondorder, bondtype, equiv_class, stereo = BONDLOOKUP[text]
     # if the bond came in as aromatic (which it
     #  CAN'T!))
     if bondtype == 4:
         assert 0, "Bond's shouldn't come in as ':'"
         fixed = 0
     else:
         fixed = 1
     bond = Bond(text, bondorder, bondtype, fixed, stereo)
     bond.equiv_class = equiv_class
     self._pending_bond = bond
Beispiel #5
0
def addHydrogens(mol):
    if not mol.explicitHydrogens:
        for atom in mol.atoms:
            for i in range(atom.hcount):
                hatom = Atom()
                hatom.symbol = 'H'

                #is the coordinates of hydrogen stored somewhere in the
                #atom the hydrogen is attached to?
                ##                hatom.x =
                ##                hatom.y =
                ##                hatom.z =

                #does the hcount need to be changed?
                ##                atom.hcount -= 1

                mol.add_atom(hatom)
                bond = Bond()
                mol.add_bond(bond, atom, hatom)

        #reset atom indices
        index = 0
        for atom in mol.atoms:
            atom.index = index
            index += 1

        mol.explicitHydrogens = 1  #change flag to let know that hydrogens are explicit in this mol
    return mol
Beispiel #6
0
    def add_atom(self, atom):
        atoms = self.atoms
        atom.index = len(atoms)
        atoms.append(atom)
        if self.vfgraph:
            index = self.insert_node(atom)


##            assert index == atom.index, "%s <--> %s"%(index, atom.index)

##        self.mol.add_atom(atom)

        if self._pending_bond == implicit_bond:
            # Implicit single or aromatic bond
            self._pending_bond = Bond()

        if self._pending_bond is not None:
            bond = self._pending_bond
            prev_atom = self._prev_atoms[-1]
            bond.atoms[:] = [prev_atom, atom]
            ##self.mol.add_bond(bond, prev_atom, atom)
            bond.atoms = [prev_atom, atom]
            atom.bonds.append(bond)
            prev_atom.bonds.append(bond)
            atom.oatoms.append(prev_atom)
            prev_atom.oatoms.append(atom)
            self.bonds.append(bond)
            if self.vfgraph:
                index1, index2 = prev_atom.index, atom.index
                insert_edge = self.insert_edge
                insert_edge(index1, index2, bond)
                insert_edge(index2, index1, bond)

        self._pending_bond = implicit_bond
        if not self._prev_atoms:
            self._prev_atoms.append(atom)
        else:
            self._prev_atoms[-1] = atom
Beispiel #7
0
def reader(file, stripHydrogens=1):
    lines = collector(file)

    while 1:
        try:
            fields = {}
            name = lines.next().strip()
            userLine = lines.next().strip()
            comment = lines.next().strip()
            molinfo = lines.next()
            numAtoms, numBonds = int(molinfo[0:3]), int(molinfo[3:6])

            atoms = []   # this is the full list of atoms
            _atoms = []  # this is the (potentially stripped list
                         # of atoms.  I.e. no hydrogens.)
            i = 0
            for index in range(numAtoms):
                line = lines.next()
                x,y,z,symbol,mass,charge,stereo,hcount,hcount_fixed = parse_atom(line)
                if symbol == "H" and stripHydrogens:
                    atoms.append(None)
                else:
                    atom = Atom()
                    atoms.append(atom)
                    _atoms.append(atom)
                    atom.set_symbol(symbol)# = symbol
                    atom.explicit_hcount = hcount
                    atom.charge = charge
                    atom._line = line
                    atom.x = x
                    atom.y = y
                    atom.z = z
                    if hcount_fixed:
                        print "hcount fixed"
                        atom.fixed_hcount = 1 # oops, we shouldn't use this
                        atom.has_explicit_hcount = True
                    if mass:
                        atom.weight = atom.mass + mass

                    atom.index = i
                    i = i + 1

            bonds = []
            for index in range(numBonds):
                line = lines.next()
                a1, a2, bondtype, stereo, remainder = parse_bond(line)
                symbol, bondorder, bondtype, fixed = BOND_SYMBOL[bondtype]

                atom1, atom2 = atoms[a1], atoms[a2]
                
                if atom1 is not None and atom2 is not None:
                    h1, h2 = atom1.handle, atom2.handle
                    bond = Bond(symbol, bondorder, bondtype, fixed)
                    bonds.append(bond)
                    bond._line = remainder
                    bond.index = index
                    bond.atoms = [atom1, atom2]
                    try:
                        bond.stereo = BOND_LOOKUP_STEREO[bondtype-1][stereo]
                    except KeyError:
                        raise MolReaderError("An SD record cannot have a bondtype of %s and a stereo value of %s"%(bondtype, stereo))
                    except IndexError:
                        print "*"*44
                        print line
                        print "bondtype, stereo", bondtype, stero
                        raise
                        
                    atom1.bonds.append(bond)
                    atom2.bonds.append(bond)
                    atom1.oatoms.append(atom2)
                    atom2.oatoms.append(atom1)
                    if atom1.symbol == "H": atom2.explicit_hcount += 1
                    if atom2.symbol == "H": atom1.explicit_hcount += 1
                else:
                    if atom1 is None and atom2 is not None:
                        atom2.explicit_hcount += 1
                    elif atom2 is None and atom1 is not None:
                        atom1.explicit_hcount += 1
                        
            ##############################################################
            # read the mprops if necessary
            line = lines.next().strip()
            while 1:
                if line and line[0:6] == "M  END":
                    line = lines.next().strip()
                    break
                elif line == "M  CHG":
                    groups = line[6:].split()[1:]
                    index = 0
                    while index < len(groups):
                        atomIndex = int(groups[index]) - 1
                        atom = self.atoms[atomIndex]
                        charge = int(groups[index+1])
                        self.atoms[atomIndex].charge = charge
                        index += 2
                    line = lines.next().strip()
                elif line and line[0] == ">":
                    break
                elif line[0:4] == "$$$$":
                    break
                line = lines.next().strip()
                # What about end of mol?

            #############################################################
            # read the fields if necessary
            
            while line != "$$$$":
                if line and line[0] == ">":
                    res = FIELDPATTERN.match(line)
                    if res: field, potentialID = res.groups()
                    else:
                        res = ALTFIELDPATTERN.match(line)
                        if res:
                            field = res.groups()[0]
                            potentialID = None
                        else:
                            field, potentialID = None, None
                            
                    if name is None: name = potentialID

                    if field:
                        data = []
                        line = lines.next().strip()
                        while line and line != "$$$$":
                            data.append(line)
                            line = lines.next().strip()
                            
                        fields[field] = os.linesep.join(data)
                    
                line = lines.next().strip()
            mol = Molecule(_atoms, bonds)
            mol.name = name
            mol.fields = fields
            mol.name = name
            yield mol, lines.dump(), None
            
        except StopIteration:
            break
        except Exception:
            line = lines.current.strip()
            
            while line[0:4] != "$$$$":
                line = lines.next().strip()                
                
            stdout, stderr = sys.stdout, sys.stderr
            sys.stdout = sys.stderr = io = StringIO()
            traceback.print_exc()
            sys.stdout = stdout
            sys.stderr = stderr
            yield None, lines.dump(), io.getvalue()
Beispiel #8
0
class MolReader:
    def __init__(self, file, stripHydrogens=1):
        self.file = file
        self.iterator = iter(file)
        self._lastlines = []        # lastlines stores the original lines that made up the
                                            #  last molecule read
        self.stripHydrogens = stripHydrogens
        self._lastline = None
        
    def _readline(self, endOk=0):
        """internal readline function, if endOk is 0 then upon an end of
        line a MolReaderError is generated"""
        if self._lastline:
            res = self._lastline
            self._lastline = None
            return res

        try:
            line = self.iterator.next()
            self._lastlines.append(line)
        except StopIteration:
            line = None
            

        if not line and not endOk:
            raise MolReaderError, "Unexpected end of file"

        return line

    def _pushback(self, line):
        self._lastline = line
        
    def _clear(self):
        """Clear the _lastlines buffer"""
        self._lastlines = []

    def get_text(self):
        """->text that formed the last molecule read"""
        return "".join(self._lastlines)

    def get_lines(self):
        """->the lines of text that formed the last molecule read"""
        return self._lastlines
    
    def _read_to_next(self):
        readline = self._readline
        endOfMol = self._endOfMol
        
        while 1:
            line = readline(endOk=1)
            if not line:
                break

            if endOfMol(line):
                break


    def _endOfMol(self, line):
        """(line)-> return 1 if the line signifies the end of molecule
        0 otherwise"""
        if line[0:4] == "$$$$":
            return 1
        return 0

    def _readFields(self,
                    pattern=re.compile(">\s+<([^>]+)>\s+\(*([^)]*)")
                    ):
        """Read the database field component at the end of a molecule
        record.  Sets a dictionary of key->values"""
        readline = self._readline
        endOfMol = self._endOfMol
        
        fields = {}

        name = None
        while 1:
            # by setting endOk = 1 we can read mol files as
            # well as sdfiles
            line = readline(endOk=1)
            if not line:
                break
            
            if endOfMol(line):
                break
            elif line[0] == ">":
                # we have a data line so get the field
                #  and potentialID values
                if not endOfMol(line):
                    res = pattern.match(line)

                    if res:
                        field, potentialID = res.groups()
                    else:
                        field, potentialID = None, None

                if name is None:
                    name = potentialID
                elif name != potentialID:
                    name = "UNKNOWN (id clash)"
                    
                # read the data from the next line
                if field:
                    line = readline().strip()
                    data = []
                    while line:
                        data.append(line)
                        line = readline().strip()
                        
                    if not endOfMol(line):
                        fields[field] = os.linesep.join(data)
                    else:
                        break
                    
                if endOfMol(line):
                    break

        if not endOfMol(line):
            # by setting endok = 1 here we can read
            # mol files as well as sd files
            line = readline(endOk=1)
                 
        return fields, name

    def readMProps(self):
        readline = self._readline
        while 1:
            line = readline()
            if line[0] == ">":
                # need to push back the last line
                self._pushback(line)
                return
            
            if line[0:6] == "M  END":
                break

            if line[0:6] == "M  CHG":
                # parse the charge line and add charges
                # to the correct atoms
                groups = line[6:].split()[1:]
                index = 0
                while index < len(groups):
                    atomIndex = int(groups[index]) - 1
                    atom = self.atoms[atomIndex]
                    charge = int(groups[index+1])
                    self.atoms[atomIndex].charge = charge
                    index += 2
        
    def read_one(self):
        """Read one molecule from the sd file"""
        self._clear()
        readline = self._readline
        endOfMol = self._endOfMol
        try:
            name = readline().strip()
            userLine = readline()
            comment = readline()
            line = readline()
        except MolReaderError, msg:
            if str(msg) == "Unexpected end of file":
                return None            
            raise

        try:
            numAtoms, numBonds = map(int, (line[0:3], line[3:6]))
        except ValueError: # XXX FIX ME - trap exceptions and stuff
            print "cannot parse atom, bond line"
            self._read_to_next()
            return None
        atoms = self.atoms = []

        for index in range(numAtoms):
            line = readline()
            try:
                x,y,z,symbol,mass,charge,stereo,hcount,hcount_fixed = parse_atom(line)
            except: # XXX FIX ME - trap exceptions and stuff
                self._read_to_next()
                return None
            
            atom = Atom()
            atom._line = line
            atom.symbol = symbol
            atom.explicit_hcount = hcount
            atom.charge = charge

            #if hcount_fixed:                
            #symbol, hcount, charge, weight=0, aromatic=0)
            
            # XXX FIX ME
            # a really bad hack here.
            # ignore please!
            atom._line = line
            atom.x = x
            atom.y = y
            atom.z = z
            if hcount_fixed: atom.fixed_hcount = 1
            if mass:
                atom.weight = atom.mass + mass
            atom.index = len(atoms)
            atoms.append(atom)
            if vfgraph:
                insert_node(atom)

        bonds = []
        mappings = []
        bondCount = [0] * len(atoms)
        closures = {}
        for index in range(numBonds):
            line = readline()
            try:
                a1, a2, bondtype, remainder = parse_bond(line)
            except:
                self._read_to_next()
                return None
            a1 -= 1
            a2 -= 1

            symbol, bondorder, bondtype, fixed = BOND_SYMBOL[bondtype]
            atom1 = atoms[a1]
            atom2 = atoms[a2]
            if stripHydrogen:
                if atom1.symbol == "H":
                    atom2.hcount += 1
                    atom2.hcount_fixed = 1
                if atom2.symbol == "H":
                    atom1.hcount += 1
                    atom1.hcount_fixed = 1
            else:
                bond = Bond(symbol, bondorder, bondtype, fixed)
                bond._line = line
                # XXX FIX ME
                # a really bad hack here
                # ignore please!
                bond._line = remainder
                bond.atoms = [a1, a2]
                a1.bonds.append(bond)
                a2.bonds.append(bond)
                a1.oatoms.append(a1)
                a2.oatoms.append(a2)
                bonds.append(bond)
##            mappings.append((bond, a1, a2))
##                bondCount[a1] += 1
##                bondCount[a2] += 1

        self.readMProps()
        
        fields, potentialName = self._readFields()

        if not name:
            name = potentialName
        elif name != potentialName:
            # XXX FIX ME, what do I do here?
            pass
        
        # we've tokenized the molecule, now we need to build one
        # XXX FIX ME - Split this up into a builder and a tokenizer ?
        mol = Molecule()
        mol.name = name
        mol.fields = fields
#        for atom in atoms:
#            mol.add_atom(atom)
            
#        for bond, a1, a2 in mappings:
#            atom1, atom2 = atoms[a1], atoms[a2]

            # XXX FIX ME
            # does this format mean the atom's hcount can't
            #  change?
#            stripHydrogens = self.stripHydrogens
#            if not hasattr(atom1, "number"):
#                print atom
#                print atom.__dict__
##            if stripHydrogens and atom1.symbol == "H" and bondCount[a1] == 1:
##                atom2.hcount += 1
##                atom2.hcount_fixed = 1
##                bondCount[a2] -=1
##                mol.remove_atom(atom1)
##            elif stripHydrogens and atom2.symbol == "H" and bondCount[a2] == 1:
##                atom1.hcount += 1
##                atom1.hcount_fixed = 1
##                bondCount[a1] -= 1
##                mol.remove_atom(atom2)
##            else:
##                mol.add_bond(bond, atom1, atom2)
##                if bond.bondtype == 4:
##                    atom1.aromatic = 1
##                    atom2.aromatic = 1

##        # get rid of any non-bonded hydrogens
##        atomsToDelete = []
##        for atom in mol.atoms:
##            if atom.symbol == "H":
##                assert len(atom.bonds) == 0
##                atomsToDelete.append(atom)
##        for atom in atomsToDelete:
##            mol.remove_atom(atom)

        index = 0
        for atom in mol.atoms:
            assert atom.symbol != "H"
            if len(atom.bonds) > 1:
                atom._closure = 1
            atom.index = index
            index += 1

        return mol