Esempio n. 1
0
def readfile(format, filename):
    """Iterate over the molecules in a file.

    Required parameters:
       format - see the informats variable for a list of available
                input formats
       filename

    You can access the first molecule in a file using the next() method
    of the iterator:
        mol = readfile("smi", "myfile.smi").next()

    You can make a list of the molecules in a file using:
        mols = list(readfile("smi", "myfile.smi"))

    You can iterate over the molecules in a file as shown in the
    following code snippet:
    >>> atomtotal = 0
    >>> for mol in readfile("sdf", "head.sdf"):
    ...     atomtotal += len(mol.atoms)
    ...
    >>> print atomtotal
    43
    """
    format = format.lower()
    if not os.path.isfile(filename):
        raise IOError, "No such file: '%s'" % filename
    builder = cdk.DefaultChemObjectBuilder.getInstance()
    if format=="sdf":
        return (Molecule(mol) for mol in cdk.io.iterator.IteratingSDFReader(
               java.io.FileInputStream(java.io.File(filename)),
               builder)
               )
    elif format=="smi":
        return (Molecule(mol) for mol in cdk.io.iterator.IteratingSmilesReader(
            java.io.FileInputStream(java.io.File(filename)),
            builder
            ))
    elif format == 'inchi':
        inputfile = open(filename, 'rb')
        return (readstring('inchi', line.rstrip()) for line in inputfile)
    elif format in informats:
        reader = _informats[format](java.io.FileInputStream(java.io.File(filename)))
        chemfile = reader.read(cdk.ChemFile())
        manip = cdk.tools.manipulator.ChemFileManipulator
        return iter(Molecule(manip.getAllAtomContainers(chemfile)[0]),)
    else:
        raise ValueError,"%s is not a recognised CDK format" % format
Esempio n. 2
0
def readstring(format, string):
    """Read in a molecule from a string.

    Required parameters:
       format - see the informats variable for a list of available
                input formats
       string

    Example:
    >>> input = "C1=CC=CS1"
    >>> mymol = readstring("smi", input)
    >>> len(mymol.atoms)
    5
    """
    format = format.lower()
    if format == "smi":
        sp = cdk.smiles.SmilesParser(
            cdk.DefaultChemObjectBuilder.getInstance())
        try:
            ans = sp.parseSmiles(string)
        except InvalidSmilesException as ex:
            if sys.platform[:4] != "java":
                #Jpype exception
                ex = ex.message()
            raise IOError(ex)
        return Molecule(ans)
    elif format == 'inchi':
        factory = cdk.inchi.InChIGeneratorFactory.getInstance()
        intostruct = factory.getInChIToStructure(
            string, cdk.DefaultChemObjectBuilder.getInstance())
        return Molecule(intostruct.getAtomContainer())
    elif format in informats:
        reader = _informats[format](java.io.StringReader(string))
        chemfile = reader.read(cdk.ChemFile())
        manip = cdk.tools.manipulator.ChemFileManipulator
        return Molecule(manip.getAllAtomContainers(chemfile)[0])
    else:
        raise ValueError("%s is not a recognised CDK format" % format)
Esempio n. 3
0
        try:
            ans = sp.parseSmiles(string)
        except InvalidSmilesException, ex:
            if sys.platform[:4] != "java":
                #Jpype exception
                ex = ex.message()
            raise IOError, ex
        return Molecule(ans)
    elif format == 'inchi':
        factory = cdk.inchi.InChIGeneratorFactory.getInstance()
        intostruct = factory.getInChIToStructure(
            string, cdk.DefaultChemObjectBuilder.getInstance())
        return Molecule(intostruct.getAtomContainer())
    elif format in informats:
        reader = _informats[format](java.io.StringReader(string))
        chemfile = reader.read(cdk.ChemFile())
        manip = cdk.tools.manipulator.ChemFileManipulator
        return Molecule(manip.getAllAtomContainers(chemfile)[0])
    else:
        raise ValueError, "%s is not a recognised CDK format" % format


class Outputfile(object):
    """Represent a file to which *output* is to be sent.

    Required parameters:
       format - see the outformats variable for a list of available
                output formats
       filename

    Optional parameters: