Esempio n. 1
0
def WriteSDF(df,
             out,
             molColName='ROMol',
             idName=None,
             properties=None,
             allNumeric=False):
    '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export all columns.
  The "allNumeric" flag allows to automatically include all numeric columns in the output. User has to make sure that correct data type is assigned to column.
  "idName" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title.
  '''

    close = None
    if isinstance(out, string_types):
        if out.lower()[-3:] == ".gz":
            import gzip
            out = gzip.open(out, "wb")
            close = out.close

    writer = SDWriter(out)
    if properties is None:
        properties = []
    else:
        properties = list(properties)
    if allNumeric:
        properties.extend([
            dt for dt in df.dtypes.keys()
            if (np.issubdtype(df.dtypes[dt], float)
                or np.issubdtype(df.dtypes[dt], int))
        ])

    if molColName in properties:
        properties.remove(molColName)
    if idName in properties:
        properties.remove(idName)
    writer.SetProps(properties)
    for row in df.iterrows():
        # make a local copy I can modify
        mol = Chem.Mol(row[1][molColName])

        if idName is not None:
            if idName == 'RowID':
                mol.SetProp('_Name', str(row[0]))
            else:
                mol.SetProp('_Name', str(row[1][idName]))
        for p in properties:
            cell_value = row[1][p]
            # Make sure float does not get formatted in E notation
            if np.issubdtype(type(cell_value), float):
                s = '{:f}'.format(cell_value).rstrip(
                    "0")  # "f" will show 7.0 as 7.00000
                if s[-1] == ".":
                    s += "0"  # put the "0" back on if it's something like "7."
                mol.SetProp(p, s)
            else:
                mol.SetProp(p, str(cell_value))
        writer.write(mol)
    writer.close()
    if close is not None:
        close()
Esempio n. 2
0
def WriteSDF(df,
             out,
             molColName='ROMol',
             idName=None,
             properties=None,
             allNumeric=False):
    '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export all columns.
  The "allNumeric" flag allows to automatically include all numeric columns in the output. User has to make sure that correct data type is assigned to column.
  "idName" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title.
  '''
    writer = SDWriter(out)
    if properties is None:
        properties = []
    if allNumeric:
        properties.extend([
            dt for dt in df.dtypes.keys()
            if (np.issubdtype(df.dtypes[dt], float)
                or np.issubdtype(df.dtypes[dt], int))
        ])

    if molColName in properties:
        properties.remove(molColName)
    if idName in properties:
        properties.remove(idName)
    writer.SetProps(properties)
    for row in df.iterrows():
        mol = copy.deepcopy(row[1][molColName])
        # Remove embeded props
        for prop in mol.GetPropNames():
            mol.ClearProp(prop)

        if idName is not None:
            if idName == 'RowID':
                mol.SetProp('_Name', str(row[0]))
            else:
                mol.SetProp('_Name', str(row[1][idName]))
        for p in properties:
            cell_value = row[1][p]
            # Make sure float does not get formatted in E notation
            if np.issubdtype(type(cell_value), float):
                mol.SetProp(p, '{:f}'.format(cell_value).rstrip('0'))
            else:
                mol.SetProp(p, str(cell_value))
        writer.write(mol)
    writer.close()
Esempio n. 3
0
    def writeSdf(self, sdf_name, fields=None):
        """
        Writes an sdf file with molecules stored. Is it possible also to manage which field will be written

        Parameters
        ----------
        sdf_name: str
            The ouput sdf filename
        fields: list
            A list of the fields to write. If None all are saved
        """

        from rdkit.Chem import SDWriter

        writer = SDWriter(sdf_name)
        if fields is not None:
            if not isinstance(fields, list):
                raise TypeError(
                    f"The fields argument {type(fields)} should be a list")
            writer.SetProps(fields)

        for m in self._mols:
            writer.write(m._mol)
Esempio n. 4
0
def WriteSDF(df,
             out,
             molColumn,
             properties=None,
             allNumeric=False,
             titleColumn=None):
    '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specific in the "properties" list.
   The "allNumeric" flag allows to automatically include all numeric columns in the output.
   "titleColumn" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title.
  '''
    writer = SDWriter(out)
    if properties is None:
        properties = []
    if allNumeric:
        properties.extend([
            dt for dt in df.dtypes.keys()
            if (np.issubdtype(df.dtypes[dt], float)
                or np.issubdtype(df.dtypes[dt], int))
        ])

    if molColumn in properties:
        properties.remove(molColumn)
    if titleColumn in properties:
        properties.remove(titleColumn)
    writer.SetProps(properties)
    for row in df.iterrows():
        mol = copy.deepcopy(row[1][molColumn])
        if titleColumn is not None:
            if titleColumn == 'RowID':
                mol.SetProp('_Name', str(row[0]))
            else:
                mol.SetProp('_Name', row[1][titleColumn])
        for p in properties:
            mol.SetProp(p, str(row[1][p]))
        writer.write(mol)
    writer.close()