def readfile(format, filename): """Iterate over the molecules in a file. Required parameters: format - see the informats variable for a list of available input formats filename You can access the first molecule in a file using the next() method of the iterator: mol = readfile("smi", "myfile.smi").next() You can make a list of the molecules in a file using: mols = list(readfile("smi", "myfile.smi")) You can iterate over the molecules in a file as shown in the following code snippet: >>> atomtotal = 0 >>> for mol in readfile("sdf", "head.sdf"): ... atomtotal += len(mol.atoms) ... >>> print atomtotal 43 """ format = format.lower() if not os.path.isfile(filename): raise IOError, "No such file: '%s'" % filename builder = cdk.DefaultChemObjectBuilder.getInstance() if format=="sdf": return (Molecule(mol) for mol in cdk.io.iterator.IteratingSDFReader( java.io.FileInputStream(java.io.File(filename)), builder) ) elif format=="smi": return (Molecule(mol) for mol in cdk.io.iterator.IteratingSmilesReader( java.io.FileInputStream(java.io.File(filename)), builder )) elif format == 'inchi': inputfile = open(filename, 'rb') return (readstring('inchi', line.rstrip()) for line in inputfile) elif format in informats: reader = _informats[format](java.io.FileInputStream(java.io.File(filename))) chemfile = reader.read(cdk.ChemFile()) manip = cdk.tools.manipulator.ChemFileManipulator return iter(Molecule(manip.getAllAtomContainers(chemfile)[0]),) else: raise ValueError,"%s is not a recognised CDK format" % format
def readstring(format, string): """Read in a molecule from a string. Required parameters: format - see the informats variable for a list of available input formats string Example: >>> input = "C1=CC=CS1" >>> mymol = readstring("smi", input) >>> len(mymol.atoms) 5 """ format = format.lower() if format == "smi": sp = cdk.smiles.SmilesParser( cdk.DefaultChemObjectBuilder.getInstance()) try: ans = sp.parseSmiles(string) except InvalidSmilesException as ex: if sys.platform[:4] != "java": #Jpype exception ex = ex.message() raise IOError(ex) return Molecule(ans) elif format == 'inchi': factory = cdk.inchi.InChIGeneratorFactory.getInstance() intostruct = factory.getInChIToStructure( string, cdk.DefaultChemObjectBuilder.getInstance()) return Molecule(intostruct.getAtomContainer()) elif format in informats: reader = _informats[format](java.io.StringReader(string)) chemfile = reader.read(cdk.ChemFile()) manip = cdk.tools.manipulator.ChemFileManipulator return Molecule(manip.getAllAtomContainers(chemfile)[0]) else: raise ValueError("%s is not a recognised CDK format" % format)
try: ans = sp.parseSmiles(string) except InvalidSmilesException, ex: if sys.platform[:4] != "java": #Jpype exception ex = ex.message() raise IOError, ex return Molecule(ans) elif format == 'inchi': factory = cdk.inchi.InChIGeneratorFactory.getInstance() intostruct = factory.getInChIToStructure( string, cdk.DefaultChemObjectBuilder.getInstance()) return Molecule(intostruct.getAtomContainer()) elif format in informats: reader = _informats[format](java.io.StringReader(string)) chemfile = reader.read(cdk.ChemFile()) manip = cdk.tools.manipulator.ChemFileManipulator return Molecule(manip.getAllAtomContainers(chemfile)[0]) else: raise ValueError, "%s is not a recognised CDK format" % format class Outputfile(object): """Represent a file to which *output* is to be sent. Required parameters: format - see the outformats variable for a list of available output formats filename Optional parameters: