def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumeric=False): '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export all columns. The "allNumeric" flag allows to automatically include all numeric columns in the output. User has to make sure that correct data type is assigned to column. "idName" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title. ''' close = None if isinstance(out, string_types): if out.lower()[-3:] == ".gz": import gzip out = gzip.open(out, "wb") close = out.close writer = SDWriter(out) if properties is None: properties = [] else: properties = list(properties) if allNumeric: properties.extend([ dt for dt in df.dtypes.keys() if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int)) ]) if molColName in properties: properties.remove(molColName) if idName in properties: properties.remove(idName) writer.SetProps(properties) for row in df.iterrows(): # make a local copy I can modify mol = Chem.Mol(row[1][molColName]) if idName is not None: if idName == 'RowID': mol.SetProp('_Name', str(row[0])) else: mol.SetProp('_Name', str(row[1][idName])) for p in properties: cell_value = row[1][p] # Make sure float does not get formatted in E notation if np.issubdtype(type(cell_value), float): s = '{:f}'.format(cell_value).rstrip( "0") # "f" will show 7.0 as 7.00000 if s[-1] == ".": s += "0" # put the "0" back on if it's something like "7." mol.SetProp(p, s) else: mol.SetProp(p, str(cell_value)) writer.write(mol) writer.close() if close is not None: close()
def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumeric=False): '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export all columns. The "allNumeric" flag allows to automatically include all numeric columns in the output. User has to make sure that correct data type is assigned to column. "idName" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title. ''' writer = SDWriter(out) if properties is None: properties = [] if allNumeric: properties.extend([ dt for dt in df.dtypes.keys() if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int)) ]) if molColName in properties: properties.remove(molColName) if idName in properties: properties.remove(idName) writer.SetProps(properties) for row in df.iterrows(): mol = copy.deepcopy(row[1][molColName]) # Remove embeded props for prop in mol.GetPropNames(): mol.ClearProp(prop) if idName is not None: if idName == 'RowID': mol.SetProp('_Name', str(row[0])) else: mol.SetProp('_Name', str(row[1][idName])) for p in properties: cell_value = row[1][p] # Make sure float does not get formatted in E notation if np.issubdtype(type(cell_value), float): mol.SetProp(p, '{:f}'.format(cell_value).rstrip('0')) else: mol.SetProp(p, str(cell_value)) writer.write(mol) writer.close()
def writeSdf(self, sdf_name, fields=None): """ Writes an sdf file with molecules stored. Is it possible also to manage which field will be written Parameters ---------- sdf_name: str The ouput sdf filename fields: list A list of the fields to write. If None all are saved """ from rdkit.Chem import SDWriter writer = SDWriter(sdf_name) if fields is not None: if not isinstance(fields, list): raise TypeError( f"The fields argument {type(fields)} should be a list") writer.SetProps(fields) for m in self._mols: writer.write(m._mol)
def WriteSDF(df, out, molColumn, properties=None, allNumeric=False, titleColumn=None): '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as SDF tags if specific in the "properties" list. The "allNumeric" flag allows to automatically include all numeric columns in the output. "titleColumn" can be used to select a column to serve as molecule title. It can be set to "RowID" to use the dataframe row key as title. ''' writer = SDWriter(out) if properties is None: properties = [] if allNumeric: properties.extend([ dt for dt in df.dtypes.keys() if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int)) ]) if molColumn in properties: properties.remove(molColumn) if titleColumn in properties: properties.remove(titleColumn) writer.SetProps(properties) for row in df.iterrows(): mol = copy.deepcopy(row[1][molColumn]) if titleColumn is not None: if titleColumn == 'RowID': mol.SetProp('_Name', str(row[0])) else: mol.SetProp('_Name', row[1][titleColumn]) for p in properties: mol.SetProp(p, str(row[1][p])) writer.write(mol) writer.close()