コード例 #1
0
 def _lookup(self, file_path):
     # Assume one ligand in the pdbbind ligand SDF file
     ligand = fo.read_sdf_to_mol(file_path,
                                 sanitize=False,
                                 add_hs=False,
                                 remove_hs=True)[0]
     return Chem.MolToSmiles(ligand)
コード例 #2
0
ファイル: datasets.py プロジェクト: sailfish009/atom3d
 def __getitem__(self, index: int):
     if not 0 <= index < self._num_examples:
         raise IndexError(index)
     # Read biopython structure
     file_path = self._file_list[index]
     structure = fo.read_sdf(str(file_path),
                             sanitize=False,
                             add_hs=False,
                             remove_hs=False)
     # assemble the item (no bonds)
     item = {
         'atoms': fo.bp_to_df(structure),
         'id': structure.id,
         'file_path': str(file_path),
     }
     # Add bonds if included
     if self._read_bonds:
         mol = fo.read_sdf_to_mol(str(file_path),
                                  sanitize=False,
                                  add_hs=False,
                                  remove_hs=False)
         bonds_df = fo.get_bonds_list_from_mol(mol[0])
         item['bonds'] = bonds_df
     if self._transform:
         item = self._transform(item)
     return item
コード例 #3
0
    def __init__(self, csv_file, sdf_file, name='molecules'):
        """Initializes a data set from a CSV file.
        
        Args:
            csv_file (str): CSV file with label data.
            sdf_file (str): SDF file with coordinates.
            name (str, opt.): Name of the dataset. Default: 'molecules'.
        
        """

        df = pd.read_csv(csv_file)
        self.raw_data = [df[col] for col in df.keys()]
        self.raw_mols = dt.read_sdf_to_mol(sdf_file, sanitize=False)

        # Simple sanity check:
        # Is the number of molecules the same in both files?
        assert len(self.raw_mols) == len(self.raw_data[0])

        self.num_atoms = []
        self.charges = []
        self.positions = []
        self.index = []
        self.data = []
        self.data_keys = [k for k in df.keys()]

        for im, m in enumerate(self.raw_mols):

            if m is None:
                print('Molecule', im + 1, 'could not be processed.')
                continue

            new_atnums = np.array([a.GetAtomicNum() for a in m.GetAtoms()])
            conf_coord = dt.get_coordinates_of_conformer(m)

            self.num_atoms.append(m.GetNumAtoms())
            self.index.append(im + 1)
            self.charges.append(new_atnums)
            self.positions.append(conf_coord)
            self.data.append([col[im] for col in self.raw_data])

        return
コード例 #4
0
 def process(self):
     label_file = os.path.join(self.root, 'pdbbind_refined_set_labels.csv')
     label_df = pd.read_csv(label_file)
     i = 0
     for raw_path in self.raw_paths:
         pdb_code = fi.get_pdb_code(raw_path)
         y = torch.FloatTensor([get_label(pdb_code, label_df)])
         if '_ligand' in raw_path:
             mol_graph = graph.mol_to_graph(
                 dt.read_sdf_to_mol(raw_path, add_hs=True)[0])
         elif '_pocket' in raw_path:
             prot_graph = graph.prot_df_to_graph(
                 dt.bp_to_df(dt.read_any(raw_path, name=pdb_code)))
             node_feats, edge_index, edge_feats, pos = graph.combine_graphs(
                 prot_graph, mol_graph, edges_between=True)
             data = Data(node_feats, edge_index, edge_feats, y=y, pos=pos)
             data.pdb = pdb_code
             torch.save(
                 data,
                 os.path.join(self.processed_dir, 'data_{}.pt'.format(i)))
             i += 1
         else:
             continue
コード例 #5
0
    def __init__(self, csv_file, sdf_file, name='molecules'):
        """Initializes a data set from a CSV file.
        
        Args:
            csv_file (str): CSV file with label data.
            sdf_file (str): SDF file with coordinates.
            name (str, opt.): Name of the dataset. Default: 'molecules'.
        
        """

        df = pd.read_csv(csv_file)
        self.raw_data = [df[col] for col in df.keys()]
        self.raw_mols = dt.read_sdf_to_mol(sdf_file, sanitize=False)

        # Simple sanity check:
        # Is the number of molecules the same in both files?
        assert len(self.raw_mols) == len(self.raw_data[0])

        self.index = []
        self.data = []
        self.data_keys = [k for k in df.keys()]
        self.mol_dfs = []

        for im, m in enumerate(self.raw_mols):

            if m is None:
                print('Molecule', im + 1, 'could not be processed.')
                continue

            self.index.append(im + 1)
            self.data.append([col[im] for col in self.raw_data])
            new_mol_df = dt.mol_to_df(m,
                                      add_hs=False,
                                      structure=df['mol_id'][im])
            self.mol_dfs.append(new_mol_df)

        return
コード例 #6
0
import numpy as np
import pandas as pd

import atom3d.util.formats as dt

in_dir_name = '../../data/qm9/raw'
csv_file = in_dir_name + '/gdb9.sdf.csv'
sdf_file = in_dir_name + '/gdb9.sdf'
out_file = in_dir_name + '/gdb9_with_cv_atom.csv'

df = pd.read_csv(csv_file)
raw_data = [df[col] for col in df.keys()]
raw_mols = dt.read_sdf_to_mol(sdf_file, sanitize=False)

raw_num_atoms = []
for im, m in enumerate(raw_mols):
    if m is None:
        print('Molecule', im + 1, 'could not be processed.')
        new_numat = None
        continue
    new_numat = m.GetNumAtoms()
    raw_num_atoms.append(new_numat)
raw_num_atoms = np.array(raw_num_atoms)

df['cv_atom'] = df['cv'] - (raw_num_atoms * 2.981)

print('cv:')
print(' mean:', np.mean(df['cv']))
print(' sdev:', np.std(df['cv']))
print('cv_atom:')
print(' mean:', np.mean(df['cv_atom']))