コード例 #1
0
def test_rmsd():
    data_path_1 = os.path.join(this_dir, 'data', '1b5e_1.mol2')
    data_path_2 = os.path.join(this_dir, 'data', '1b5e_2.mol2')

    pdmol_1 = PandasMol2().read_mol2(data_path_1)
    pdmol_2 = PandasMol2().read_mol2(data_path_2)

    assert pdmol_1.rmsd(pdmol_1.df, pdmol_2.df, heavy_only=False) == 1.5523
    assert pdmol_1.rmsd(pdmol_1.df, pdmol_2.df) == 1.1609
コード例 #2
0
def data_processor(mol2s):

    q_pdmol = PandasMol2()
    d_pdmol = PandasMol2()

    d_pdmol.read_mol2_from_list(mol2_code=mol2s[0][0], mol2_lines=mol2s[0][1])

    q_pdmol.read_mol2_from_list(mol2_code=mol2s[1][0], mol2_lines=mol2s[1][1])

    atoms, charges = get_atom_matches(q_pdmol, d_pdmol)
    return mol2s[0][0], mol2s[1][0], atoms, charges
コード例 #3
0
def data_processor_gz(mol2s_gz):

    q_pdmol = PandasMol2()
    d_pdmol = PandasMol2()

    d_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[0][0],
                                mol2_lines=mol2s_gz[0][1])

    q_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[1][0],
                                mol2_lines=mol2s_gz[1][1])

    atoms, charges = get_atom_matches(q_pdmol, d_pdmol)
    return (mol2s_gz[0][0].decode('utf-8'), mol2s_gz[1][0].decode('utf-8'),
            atoms, charges)
コード例 #4
0
    def SYBYL(self, atomtype_set=['Al','B','Br','C.1','C.2','C.3','C.ar','C.cat','Ca','Cl','F','H','Li',\
                                  'Mg','N.1','N.2','N.3','N.4','N.am','N.ar','N.pl3','Na','O.2','O.3',\
                                  'O.co2','P.3','S.2','S.3','S.O2','S.O','Si','Zn']):
        '''
        convert a list of smiles into SYBYL array
        '''
        atomtype_to_int = dict((a,i) for i,a in enumerate(atomtype_set))
        array_fp = np.zeros((len(self.ls_smiles), len(atomtype_set)))

        for i, smi in enumerate(self.ls_smiles):
            try:
                obconversion = openbabel.OBConversion()
                obconversion.SetInAndOutFormats("smi", "mol2")
                mol = openbabel.OBMol()
                obconversion.ReadString(mol,smi)  # read molecule from database 
                mol.AddHydrogens()
                output_mol2 = obconversion.WriteString(mol)  # transform smiles into mol2
                with open("molecule.mol2","w+") as file:   # write mol2 format into the file, molecule.mol2.
                    file.write(output_mol2)
                molecule_mol2 = PandasMol2().read_mol2("molecule.mol2")  # use biopandas to static the discriptors
                for atomtype in molecule_mol2.df['atom_type'].value_counts().index:
                    array_fp[i,atomtype_to_int[atomtype]] = molecule_mol2.df['atom_type'].value_counts()[atomtype]
            except:
                continue
        return array_fp
コード例 #5
0
def get_mol_feature(mol2file, agl_class):

    # get data
    solvation = PandasMol2().read_mol2(mol2file).df

    solvation_e = ['H', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']

    total_features = np.array([], dtype=float)
    for e1 in range(len(solvation_e)):
        ele1 = solvation_e[e1]
        for e2 in range(len(solvation_e)):
            # ligand's element
            ele2 = solvation_e[e2]
            cloudpoint1 = \
                solvation[['x', 'y', 'z']][solvation['atom_name'].str.contains('^' + ele1 + '[0-9]*$')].values
            cloudpoint2 = \
                solvation[['x', 'y', 'z']][solvation['atom_name'].str.contains('^' + ele2 + '[0-9]*$')].values

            if cloudpoint1.shape[0] == 0 or cloudpoint2.shape[0] == 0:
                # agl_features have 9 features
                agl_features = np.zeros((9, ))
            else:
                # each pair atoms feature
                agl_features = agl_class.graph_features(cloudpoint1, cloudpoint2, ele1, ele2)

            # store all pair features
            total_features = np.append(total_features, agl_features)

    return total_features
コード例 #6
0
def extract_centerdistance_data(mol, proj_direction):
    '''extracts and formats center distance from mol2 file after alignment to principal axes'''

    # Extracting data from mol2
    pd.options.mode.chained_assignment = None
    mol2 = PandasMol2().read_mol2(mol)
    atoms = mol2.df[['atom_id', 'x', 'y', 'z']]
    atoms.columns = ['atom_id', 'x', 'y', 'z']

    # Aligning to principal axes so that origin is the center of pocket
    trans_coords = alignment(
        atoms, proj_direction)  # get the transformation coordinate
    atoms['x'] = trans_coords[:, 0]
    atoms['y'] = trans_coords[:, 1]
    atoms['z'] = trans_coords[:, 2]

    atomid_list = atoms['atom_id'].tolist()
    coordinate_list = atoms.values.tolist()

    # Calculating the distance to the center of the pocket and creating dictionary
    center_dist_list = []
    for xyz in coordinate_list:
        center_dist = ((xyz[0])**2 + (xyz[1])**2 + (xyz[2])**2)**.5
        center_dist_list.append(center_dist)
    center_dist_data = dict(zip(atomid_list, center_dist_list))

    return center_dist_data
コード例 #7
0
    def parseMol2(self):
        if not self.mol2_parsed_:
            if self.lig_file.split(".")[-1] != "mol2":
                out_file = self.lig_file + ".mol2"
                self._format_convert(self.lig_file, out_file)
                self.lig_file = out_file

            if os.path.exists(self.lig_file):
                try:
                    self.lig = PandasMol2().read_mol2(self.lig_file)
                except ValueError:
                    templ_ligfile = self.lig_file + "templ.pdb"
                    self._format_convert(self.lig_file, templ_ligfile)
                    if os.path.exists(templ_ligfile):
                        self.lig = mt.load_pdb(templ_ligfile)
                        top = self.lig.topolgy
                        table, bond = top.to_dataframe()
                        self.lig_ele = list(table['element'])
                        self.coordinates_ = self.lig.xyz[0] * 10.0
                        self.lig_data = table
                        self.lig_data['x'] = self.coordinates_[:, 0]
                        self.lig_data['y'] = self.coordinates_[:, 1]
                        self.lig_data['z'] = self.coordinates_[:, 2]
                        self.mol2_parsed_ = True
                        os.remove(templ_ligfile)
                        return self
            else:
                return None

            self.lig_data = self.lig.df
            self.get_element()
            self.get_coordinates()
            self.mol2_parsed_ = True

        return self
コード例 #8
0
def voronoi_atoms_coords(bs, bs_out=None, projection=miller, proDirct=None):
    # Suppresses warning
    pd.options.mode.chained_assignment = None
    print(os.path.basename(bs))
    # Read molecules in mol2 format
    mol2 = PandasMol2().read_mol2(bs)
    atoms = mol2.df[[
        'subst_id', 'subst_name', 'atom_type', 'atom_name', 'x', 'y', 'z'
    ]]
    atoms.columns = [
        'res_id', 'residue_type', 'atom_type', 'atom_name', 'x', 'y', 'z'
    ]
    atoms['residue_type'] = atoms['residue_type'].apply(lambda x: x[0:3])

    # Align to principal Axis
    trans_coords = alignment(atoms, proDirct)
    # get the transformation coordinate
    mol2.df['x'] = trans_coords[:, 0]
    mol2.df['y'] = trans_coords[:, 1]
    mol2.df['z'] = trans_coords[:, 2]
    filename = os.path.basename(bs)
    filename_without_tail = filename.split('.')[0]

    mol2.df.to_csv(bs_out + filename_without_tail,
                   float_format="%10.4f",
                   sep='\t',
                   index=False)

    return
コード例 #9
0
    def __read_mol(self, mol_path):
        """
        Read the mol2 file as a dataframe. May include pop_path and profile_path in the future.
        """
        atoms = PandasMol2().read_mol2(mol_path)
        atoms = atoms.df[[
            'atom_id', 'subst_name', 'atom_type', 'atom_name', 'x', 'y', 'z',
            'charge'
        ]]
        atoms['residue'] = atoms['subst_name'].apply(lambda x: x[0:3])
        atoms['hydrophobicity'] = atoms['residue'].apply(
            lambda x: self.hydrophobicity[x])
        atoms['binding_probability'] = atoms['residue'].apply(
            lambda x: self.binding_probability[x])
        center_distances = self.__compute_dist_to_center(atoms[['x', 'y', 'z'
                                                                ]].to_numpy())
        atoms['distance_to_center'] = center_distances
        siteresidue_list = atoms['subst_name'].tolist()
        #qsasa_data = self.__extract_sasa_data(siteresidue_list, pop_path)
        #atoms['sasa'] = qsasa_data
        #seq_entropy_data = self.__extract_seq_entropy_data(siteresidue_list, profile_path) # sequence entropy data with subst_name as keys
        #atoms['sequence_entropy'] = atoms['subst_name'].apply(lambda x: seq_entropy_data[x])

        if atoms.isnull().values.any():
            print('invalid input data (containing nan):')
            print(mol_path)

        bonds = self.bond_parser(mol_path)

        atoms_graph = self.__form_graph(atoms, bonds, self.threshold)
        return atoms_graph
コード例 #10
0
def transform_pdb_to_numpy(pdb_file: str,
                           experiment_type: str,
                           center: bool = False) -> Mapping[str, np.array]:
    """
    adapted in part from dMaSIF – https://github.com/FreyrS/dMaSIF/blob/master/data_preprocessing/convert_pdb2npy.py
    read in a pdb
    `experiment_type` in ['pdbbind', 'scpdb']
    """
    # print(pdb_file)
    assert experiment_type in ['pdbbind', 'scpdb']

    # atom_label_to_num = {}
    num_atoms = 0
    if pdb_file[-4:] == 'mol2':
        try:
            df = PandasMol2().read_mol2(pdb_file).df
            coords = df[['x', 'y', 'z']].values
            # -- to get atom type, get first letter of string by converting to 1-byte array
            # thanks to https://stackoverflow.com/a/48320451/5338871 for this idea.
            atoms = df['atom_type'].values
            if atoms[0] == '':
                with open(
                        '../data/logs/' + experiment_type +
                        '/problem_files.txt', 'a') as outfile:
                    outfile.write(pdb_file + '\n')
                return np.zeros((1, 13))
            atoms = np.vectorize(get_element_symbols)(atoms)
        except:
            with open('../data/logs/' + experiment_type + '/problem_files.txt',
                      'a') as outfile:
                outfile.write(pdb_file + '\n')
            return np.zeros((1, 13))

    else:
        try:
            df = PandasPdb().read_pdb(pdb_file).df['ATOM']
            coords = df[['x_coord', 'y_coord', 'z_coord']].values
            atoms = df['element_symbol'].values
            if atoms[0] == '':
                with open('../data/logs/problems_files.txt', 'a') as outfile:
                    outfile.write(pdb_file + '\n')
                return np.zeros((1, 13))

            atoms = np.vectorize(get_element_symbols)(atoms)
        except:
            with open('../data/logs/problems_files.txt', 'a') as outfile:
                outfile.write(pdb_file + '\n')
            return np.zeros((1, 13))

    types = np.vectorize(atom_label_to_num.__getitem__)(atoms)
    types_array = np.zeros((len(types), len(atom_label_to_num)))
    for i, t in enumerate(types):
        types_array[i, t] = 1.0

    if center:
        coords = coords - np.mean(coords, axis=0, keepdims=True)

    combined_array = np.concatenate((coords, types_array), axis=1)
    return combined_array
コード例 #11
0
def get_coords(ac_mol2_file):

    pmol = PandasMol2().read_mol2(ac_mol2_file)
    coords = []
    molecule = []
    for atom in pmol.df.itertuples():
        coords.append([atom.x, atom.y, atom.z])

    return np.array(coords)
コード例 #12
0
    def __init__(self, ligand_fn):

        self.lig = PandasMol2().read_mol2(ligand_fn)
        #   print(self.lig.df.head())
        self.lig_data = self.lig.df

        self.lig_ele = None
        self.coordinates = None
        self.mol2_parsed_ = False
コード例 #13
0
def test_read_mol2_from_list():

    data_path = os.path.join(this_dir, 'data', '40_mol2_files.mol2')
    mol2 = next(split_multimol2(data_path))

    pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1],
                                             mol2_code=mol2[0])
    assert pdmol.df.shape == (65, 9)
    assert pdmol.code == 'ZINC38611810'
コード例 #14
0
ファイル: bionoi.py プロジェクト: CSBG-LSU/BionoiNet
def extract_seq_entropy_data(profile, mol):
    '''extracts sequence entropy data from .profile'''

    # Extracting data from mol2
    pd.options.mode.chained_assignment = None
    mol2 = PandasMol2().read_mol2(mol)
    atoms = mol2.df[['subst_name']]
    atoms.columns = ['residue_type']
    siteresidue_list = atoms['residue_type'].tolist()

    # Opening and formatting lists of the probabilities and residues
    with open(profile) as profile:
        ressingle_list = []
        probdata_list = []
        # extracting relevant information
        for line in profile:
            line_list = line.split()
            residue_type = line_list[0]
            prob_data = line_list[1:]
            prob_data = list(map(float, prob_data))
            ressingle_list.append(residue_type)
            probdata_list.append(prob_data)

    ressingle_list = ressingle_list[1:]
    probdata_list = probdata_list[1:]

    # Changing single letter amino acid to triple letter with
    # its corresponding number
    count = 0
    restriple_list = []
    for res in ressingle_list:
        newres = res.replace(res, amino_single_to_triple(res))
        count += 1
        restriple_list.append(newres + str(count))

    # Calculating information entropy
    with np.errstate(divide='ignore'):
        prob_array = np.asarray(probdata_list)
        log_array = np.log2(prob_array)
        # change all infinite values to 0
        log_array[~np.isfinite(log_array)] = 0
        entropy_array = log_array * prob_array
        entropydata_array = np.sum(a=entropy_array, axis=1) * -1
        entropydata_list = entropydata_array.tolist()

    # Matching amino acids from .mol2 and .profile files and creating dictionary
    fullprotein_data = dict(zip(restriple_list, entropydata_list))
    seq_entropy_data = {
        k: float(fullprotein_data[k])
        for k in siteresidue_list if k in fullprotein_data
    }

    return seq_entropy_data
コード例 #15
0
def test_overwrite_df():
    data_path = os.path.join(this_dir, 'data', '1b5e_1.mol2')
    pdmol = PandasMol2().read_mol2(data_path)

    def overwrite():
        pdmol.df = pdmol.df[(pdmol.df['atom_type'] != 'H')]

    expect = ('Please use `PandasMol2._df = ... `'
              ' instead\nof `PandasMol2.df = ... `'
              ' if you are sure that\nyou want'
              ' to overwrite the `df` attribute.')

    assert_raises(AttributeError, expect, overwrite)
コード例 #16
0
def check_charge(filename, charge):
    """
    Check the net charge of a mol2 file

    Parameters
    ----------
    filename : str
    charge : float
    """
    mol2 = PandasMol2().read_mol2(filename)
    sum_charge = round(mol2._df.charge.sum(),5)
    if sum_charge == charge:
        print('Check passed!')
    else:
        print('Check failed! The charge is: {:0.4f}'.format(sum_charge))
コード例 #17
0
def extract_charge_data(mol):
    '''extracts and formats charge data from mol2 file'''

    # Extracting data from mol2
    pd.options.mode.chained_assignment = None  # Suppress warning
    mol2 = PandasMol2().read_mol2(mol)
    atoms = mol2.df[['atom_id', 'charge']]  # Only need atom_id and charge data
    atoms.columns = ['atom_id', 'charge']

    # Create dictionary
    charge_list = atoms['charge'].tolist()
    atomid_list = atoms['atom_id'].tolist()
    charge_data = dict(zip(atomid_list, charge_list))

    return charge_data
コード例 #18
0
def test_read_mol2():

    data_path_1 = os.path.join(this_dir, 'data', '40_mol2_files.mol2')
    data_path_2 = os.path.join(this_dir, 'data', '40_mol2_files.mol2.gz')

    for data_path in (data_path_1, data_path_2):
        pdmol = PandasMol2().read_mol2(data_path)
        assert pdmol.df.shape == (65, 9)
        assert pdmol.code == 'ZINC38611810'

        expect = ['atom_id', 'atom_name', 'x', 'y', 'z',
                  'atom_type', 'subst_id', 'subst_name', 'charge']
        assert expect == list(pdmol.df.columns)
        assert len(pdmol.mol2_text) == 6469
        assert pdmol.mol2_path == data_path
コード例 #19
0
ファイル: dataframe.py プロジェクト: volkamerlab/opencadd
    def _from_mol2_text(cls, mol2_text, verbose=False):
        """
        Get structural data from mol2 text as DataFrame.

        Parameters
        ----------
        mol2_text : str
            Mol2 file content from KLIFS database.
        verbose : bool
            Show only default columns (False) or additionally input-format specific columns (True).

        Returns
        -------
        pandas.DataFrame
            Structural data.
        """

        mol2_text = mol2_text.split("\n")

        # Use biopandas to parse the mol2 format and return a DataFrame
        try:
            pmol = PandasMol2()
            try:
                mol2_df = pmol.read_mol2_from_list(
                    mol2_text, "mol", columns=MOL2_COLUMNS["n_cols_10"]).df
            except ValueError as e:
                if str(e) == "10 columns passed, passed data had 9 columns":
                    mol2_df = pmol.read_mol2_from_list(
                        mol2_text, "mol", columns=MOL2_COLUMNS["n_cols_9"]).df
                else:
                    raise e
        except UnboundLocalError as e:
            if str(
                    e
            ) == "local variable 'first_idx' referenced before assignment":
                raise ValueError(
                    "No structural data could be loaded. Is the input text in mol2 format?"
                )
            else:
                raise e

        # Infer residue PDB ID and name from substructure name
        mol2_df = cls._split_mol2_subst_names(mol2_df)

        # Format DataFrame
        mol2_df = cls._format_dataframe(mol2_df, verbose)

        return mol2_df
コード例 #20
0
ファイル: bionoi.py プロジェクト: CSBG-LSU/BionoiNet
def extract_sasa_data(mol, pop):
    """extracts accessible surface area data from .out file generated by POPSlegacy.

        then matches the data in the .out file to the binding site in the mol2 file.

        Used POPSlegacy https://github.com/Fraternalilab/POPSlegacy"""

    # Extracting data from mol2 file
    pd.options.mode.chained_assignment = None
    mol2 = PandasMol2().read_mol2(mol)
    # only need subst_name for matching. Other data comes from .out file
    atoms = mol2.df[['subst_name']]
    atoms.columns = ['residue_type']
    siteresidue_list = atoms['residue_type'].tolist()

    # Extracting sasa data from .out file
    residue_list = []
    qsasa_list = []
    with open(pop) as popsa:  # opening .out file
        for line in popsa:
            line_list = line.split()

            # extracting relevant information
            if len(line_list) == 12:
                residue_type = line_list[2] + line_list[4]
                if residue_type in siteresidue_list:
                    qsasa = line_list[7]
                    residue_list.append(residue_type)
                    qsasa_list.append(qsasa)

    qsasa_list = [float(x) for x in qsasa_list]
    median = statistics.median(qsasa_list)
    qsasa_new = [median if x == '-nan' else x for x in qsasa_list]

    # Matching amino acids from .mol2 and .out files and
    # creating dictionary
    qsasa_data = {}
    fullprotein_data = list(zip(residue_list, qsasa_new))
    for i in range(len(fullprotein_data)):
        if fullprotein_data[i][0] in siteresidue_list:
            qsasa_data[i + 1] = float(fullprotein_data[i][1])

    return qsasa_data
コード例 #21
0
ファイル: dataloader.py プロジェクト: Guannan1900/MolNet
 def __read_mol(self, mol_path, label):
     """
     Read the mol2 file as a dataframe.
     """
     atoms = PandasMol2().read_mol2(mol_path)
     atoms = atoms.df[[
         'atom_id', 'subst_name', 'atom_type', 'atom_name', 'x', 'y', 'z',
         'charge'
     ]]
     atoms['residue'] = atoms['subst_name'].apply(lambda x: x[0:3])
     atoms['hydrophobicity'] = atoms['residue'].apply(
         lambda x: self.hydrophobicity[x])
     atoms['binding_probability'] = atoms['residue'].apply(
         lambda x: self.binding_probability[x])
     atoms = atoms[[
         'atom_type', 'residue', 'x', 'y', 'z', 'charge', 'hydrophobicity',
         'binding_probability'
     ]]
     atoms_graph = self.__form_graph(atoms, self.threshold, label)
     return atoms_graph
コード例 #22
0
def data_processor(mol2):

    pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1],
                                             mol2_code=mol2[0])

    coordinates = pdmol.df.loc[pd.eval(SELECTION[0]), ['x', 'y', 'z']].values

    pdmol._df = pdmol._df[pd.eval(SELECTION[1])]

    for xyz in coordinates:

        distances = pdmol.distance(xyz)

        match = ((distances.values >= DISTANCE[0]).any()
                 and (distances.values <= DISTANCE[1]).any())

        if match:
            return mol2[0]

    return ''
コード例 #23
0
    def parseMol2(self):
        try:
            self.lig = PandasMol2().read_mol2(self.lig_file)
        except ValueError:
            print(
                "INFO: Warning, parse mol2 file error, converting to PDB instead ......"
            )
            templ_ligfile = self.lig_file + "templ.pdb"
            # convert mol2 format to pdb format with rdkit
            self._format_convert(self.lig_file, templ_ligfile)

            if os.path.exists(templ_ligfile):
                self.parsePDB(templ_ligfile)
                os.remove(templ_ligfile)
                return self

        self.lig_data = self.lig.df
        self.get_element()
        self.get_coordinates()
        self.ligand_parsed_ = True

        return self
コード例 #24
0
ファイル: util.py プロジェクト: volkamerlab/klifs_utils
def _mol2_text_to_dataframe(mol2_text):
    """
    Get structural data from mol2 text.

    Parameters
    ----------
    mol2_text : str
       Mol2 file content from KLIFS database.

    Returns
    -------
    pandas.DataFrame
        Structural data.
    """

    pmol = PandasMol2()

    try:
        mol2_df = pmol._construct_df(mol2_text.splitlines(True),
                                     col_names=[
                                         'atom_id', 'atom_name', 'x', 'y', 'z',
                                         'atom_type', 'subst_id', 'subst_name',
                                         'charge', 'backbone'
                                     ],
                                     col_types=[
                                         int, str, float, float, float, str,
                                         int, str, float, str
                                     ])
    except ValueError:
        mol2_df = pmol._construct_df(
            mol2_text.splitlines(True),
            col_names=[
                'atom_id', 'atom_name', 'x', 'y', 'z', 'atom_type', 'subst_id',
                'subst_name', 'charge'
            ],
            col_types=[int, str, float, float, float, str, int, str, float])

    return mol2_df
コード例 #25
0
ファイル: util.py プロジェクト: volkamerlab/klifs_utils
def _mol2_file_to_dataframe(mol2_file):
    """
    Get structural data from mol2 file.

    Parameters
    ----------
    mol2_file : pathlib.Path or str
       Path to mol2 file.

    Returns
    -------
    pandas.DataFrame
        Structural data.
    """

    mol2_file = Path(mol2_file)

    pmol = PandasMol2()

    try:
        mol2_df = pmol.read_mol2(str(mol2_file),
                                 columns={
                                     0: ('atom_id', int),
                                     1: ('atom_name', str),
                                     2: ('x', float),
                                     3: ('y', float),
                                     4: ('z', float),
                                     5: ('atom_type', str),
                                     6: ('subst_id', int),
                                     7: ('subst_name', str),
                                     8: ('charge', float),
                                     9: ('backbone', str)
                                 })

    except ValueError:
        mol2_df = pmol.read_mol2(str(mol2_file))

    return mol2_df
コード例 #26
0
def load_mol2(path):

    mol = PandasMol2().read_mol2(path)
    pdf = mol
    x_coords = pdf.df['x'].values
    y_coords = pdf.df['y'].values
    z_coords = pdf.df['z'].values
    atom_types = pdf.df['atom_name'].values
    residue_names = pdf.df['subst_name'].values
    partial_charge = pdf.df['charge'].values
    smarts_notation = next(pybel.readfile('mol2', path))

    pro_dict = generate_dict(x_coords, y_coords, z_coords, atom_types,
                             residue_names)

    pro_dict['charge'] = partial_charge
    pro_dict['smarts'] = smarts_notation

    # add a value to the dictionary, which is all of the atomic coordinates just
    # shifted to the origin
    #protein_dict = shift_coords(protein_dict)

    return pro_dict
コード例 #27
0
ファイル: ligands.py プロジェクト: zuzanna-mackiewicz/projekt
def ligands_reader():
    '''
    Parses selected MOL2 file with structures of previously docked ligands using BioPandas module.
    Lists all atoms from all ligands with their coordinates.

    :return: symbols, numbers and coordinates of atoms + number of atom
    :rtype: list of lists
    '''

    window = Tk()
    path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
    path = os.path.join(path, 'files')

    ligands_path_string = filedialog.askopenfilename(
        initialdir='path',
        title="SELECT LIGANDS STRUCTURE:",
        filetypes=(("MOL2 files", "*.mol2"), ("all files", "*.*")))
    ligands_name = os.path.basename(ligands_path_string)
    window.destroy()

    ligands_data = []
    model_number = 1
    with open(ligands_path_string, 'r') as ligands:
        for ligand in split_multimol2(ligands_path_string):
            pmol = PandasMol2().read_mol2_from_list(mol2_lines=ligand[1],
                                                    mol2_code=ligand[0])
            atom_coord = pmol.df[['atom_name', 'atom_id', 'x', 'y', 'z']]

            atom_coord = atom_coord.assign(column=model_number)
            model_number += 1

            model_data = atom_coord.values.tolist()
            ligands_data = ligands_data + model_data

    # print(ligands_data)
    return ligands_data
コード例 #28
0
def ECFP_SYBYL(row):
    try:
        obconversion = openbabel.OBConversion()
        obconversion.SetInAndOutFormats("smi", "mol2")
        mol = openbabel.OBMol()
        obconversion.ReadString(mol,
                                row["SMILES"])  # read molecule from database
        mol.AddHydrogens()
        output_mol2 = obconversion.WriteString(
            mol)  # transform smiles into mol2
        file = open("molecule.mol2",
                    "w+")  # write mol2 format into the file, molecule.mol2.
        file.write(output_mol2)
        file.close()
        molecule_mol2 = PandasMol2().read_mol2(
            "molecule.mol2")  # use biopandas to static the discriptors
        for element in molecule_mol2.df['atom_type'].value_counts().index:
            if element == 'Al':
                row['Al'] = molecule_mol2.df['atom_type'].value_counts()['Al']
            if element == 'B':
                row['B'] = molecule_mol2.df['atom_type'].value_counts()['B']
            if element == 'Br':
                row['Br'] = molecule_mol2.df['atom_type'].value_counts()['Br']
            if element == 'C.1':
                row['C.1'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.1']
            if element == 'C.2':
                row['C.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.2']
            if element == 'C.3':
                row['C.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.3']
            if element == 'C.ar':
                row['C.ar'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.ar']
            if element == 'C.cat':
                row['C.cat'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.cat']
            if element == 'Ca':
                row['Ca'] = molecule_mol2.df['atom_type'].value_counts()['Ca']
            if element == 'Cl':
                row['Cl'] = molecule_mol2.df['atom_type'].value_counts()['Cl']
            if element == 'F':
                row['F'] = molecule_mol2.df['atom_type'].value_counts()['F']
            if element == 'H':
                row['H'] = molecule_mol2.df['atom_type'].value_counts()['H']
            if element == 'Li':
                row['Li'] = molecule_mol2.df['atom_type'].value_counts()['Li']
            if element == 'Mg':
                row['Mg'] = molecule_mol2.df['atom_type'].value_counts()['Mg']
            if element == 'N.1':
                row['N.1'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.1']
            if element == 'N.2':
                row['N.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.2']
            if element == 'N.3':
                row['N.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.3']
            if element == 'N.4':
                row['N.4'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.4']
            if element == 'N.am':
                row['N.am'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.am']
            if element == 'N.ar':
                row['N.ar'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.ar']
            if element == 'N.pl3':
                row['N.pl3'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.pl3']
            if element == 'Na':
                row['Na'] = molecule_mol2.df['atom_type'].value_counts()['Na']
            if element == 'O.2':
                row['O.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.2']
            if element == 'O.3':
                row['O.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.3']
            if element == 'O.co2':
                row['O.co2'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.co2']
            if element == 'P.3':
                row['P.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['P.3']
            if element == 'S.2':
                row['S.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.2']
            if element == 'S.3':
                row['S.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.3']
            if element == 'S.O2':
                row['S.O2'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.O2']
            if element == 'S.O':
                row['S.O'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.O']
            if element == 'Si':
                row['Si'] = molecule_mol2.df['atom_type'].value_counts()['Si']
            if element == 'Zn':
                row['Zn'] = molecule_mol2.df['atom_type'].value_counts()['Zn']

        mol = Chem.MolFromSmiles(row['SMILES'])
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius,
                                                   nBits=nbits).ToBitString()
        for i in range(nbits):
            row[str(i)] = fp[i]
        return row
    except:
        print(row["SMILES"], "ECFP feature something is wrong!!")
コード例 #29
0
def SYBYL(row):
    try:
        obconversion = openbabel.OBConversion()
        obconversion.SetInAndOutFormats("smi", "mol2")
        mol = openbabel.OBMol()
        obconversion.ReadString(mol,
                                row["SMILES"])  # read molecule from database
        mol.AddHydrogens()
        output_mol2 = obconversion.WriteString(
            mol)  # transform smiles into mol2
        file = open("molecule.mol2",
                    "w+")  # write mol2 format into the file, molecule.mol2.
        file.write(output_mol2)
        file.close()
        molecule_mol2 = PandasMol2().read_mol2(
            "molecule.mol2")  # use biopandas to static the discriptors
        for element in molecule_mol2.df['atom_type'].value_counts().index:
            if element == 'Al':
                row['Al'] = molecule_mol2.df['atom_type'].value_counts()['Al']
            if element == 'B':
                row['B'] = molecule_mol2.df['atom_type'].value_counts()['B']
            if element == 'Br':
                row['Br'] = molecule_mol2.df['atom_type'].value_counts()['Br']
            if element == 'C.1':
                row['C.1'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.1']
            if element == 'C.2':
                row['C.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.2']
            if element == 'C.3':
                row['C.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.3']
            if element == 'C.ar':
                row['C.ar'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.ar']
            if element == 'C.cat':
                row['C.cat'] = molecule_mol2.df['atom_type'].value_counts(
                )['C.cat']
            if element == 'Ca':
                row['Ca'] = molecule_mol2.df['atom_type'].value_counts()['Ca']
            if element == 'Cl':
                row['Cl'] = molecule_mol2.df['atom_type'].value_counts()['Cl']
            if element == 'F':
                row['F'] = molecule_mol2.df['atom_type'].value_counts()['F']
            if element == 'H':
                row['H'] = molecule_mol2.df['atom_type'].value_counts()['H']
            if element == 'Li':
                row['Li'] = molecule_mol2.df['atom_type'].value_counts()['Li']
            if element == 'Mg':
                row['Mg'] = molecule_mol2.df['atom_type'].value_counts()['Mg']
            if element == 'N.1':
                row['N.1'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.1']
            if element == 'N.2':
                row['N.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.2']
            if element == 'N.3':
                row['N.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.3']
            if element == 'N.4':
                row['N.4'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.4']
            if element == 'N.am':
                row['N.am'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.am']
            if element == 'N.ar':
                row['N.ar'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.ar']
            if element == 'N.pl3':
                row['N.pl3'] = molecule_mol2.df['atom_type'].value_counts(
                )['N.pl3']
            if element == 'Na':
                row['Na'] = molecule_mol2.df['atom_type'].value_counts()['Na']
            if element == 'O.2':
                row['O.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.2']
            if element == 'O.3':
                row['O.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.3']
            if element == 'O.co2':
                row['O.co2'] = molecule_mol2.df['atom_type'].value_counts(
                )['O.co2']
            if element == 'P.3':
                row['P.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['P.3']
            if element == 'S.2':
                row['S.2'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.2']
            if element == 'S.3':
                row['S.3'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.3']
            if element == 'S.O2':
                row['S.O2'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.O2']
            if element == 'S.O':
                row['S.O'] = molecule_mol2.df['atom_type'].value_counts(
                )['S.O']
            if element == 'Si':
                row['Si'] = molecule_mol2.df['atom_type'].value_counts()['Si']
            if element == 'Zn':
                row['Zn'] = molecule_mol2.df['atom_type'].value_counts()['Zn']
        return row
    except:
        print(row["SMILES"], "SYBYL something is wrong!!")
コード例 #30
0
def voronoi_atoms(bs,
                  cmap,
                  colorby,
                  bs_out=None,
                  size=None,
                  dpi=None,
                  alpha=1,
                  save_fig=True,
                  projection=miller,
                  proDirct=None):
    # Suppresses warning
    pd.options.mode.chained_assignment = None

    # Read molecules in mol2 format
    mol2 = PandasMol2().read_mol2(bs)
    atoms = mol2.df[[
        'subst_id', 'subst_name', 'atom_type', 'atom_name', 'x', 'y', 'z'
    ]]
    atoms.columns = [
        'res_id', 'residue_type', 'atom_type', 'atom_name', 'x', 'y', 'z'
    ]
    atoms['residue_type'] = atoms['residue_type'].apply(lambda x: x[0:3])

    # Align to principal Axis
    trans_coords = alignment(atoms,
                             proDirct)  # get the transformation coordinate
    atoms['x'] = trans_coords[:, 0]
    atoms['y'] = trans_coords[:, 1]
    atoms['z'] = trans_coords[:, 2]

    # convert 3D  to 2D
    atoms["P(x)"] = atoms[['x', 'y', 'z']].apply(
        lambda coord: projection(coord.x, coord.y, coord.z)[0], axis=1)
    atoms["P(y)"] = atoms[['x', 'y', 'z']].apply(
        lambda coord: projection(coord.x, coord.y, coord.z)[1], axis=1)

    # setting output image size, labels off, set 120 dpi w x h
    size = 128 if size is None else size
    dpi = 120 if dpi is None else dpi

    figure = plt.figure(figsize=(int(size) / int(dpi), int(size) / int(dpi)),
                        dpi=int(dpi))
    # figsize is in inches, dpi is the resolution of the figure
    # ax = plt.subplot(111)
    ax = figure.add_subplot(111)
    # default is (111)

    ax.axis('off')
    ax.tick_params(axis='both',
                   bottom=False,
                   left=False,
                   right=False,
                   labelleft=False,
                   labeltop=False,
                   labelright=False,
                   labelbottom=False)

    # Compute Voronoi tesselation
    vor = Voronoi(atoms[['P(x)', 'P(y)']])
    regions, vertices = voronoi_finite_polygons_2d(vor)
    polygons = []
    for reg in regions:
        polygon = vertices[reg]
        polygons.append(polygon)
    atoms.loc[:, 'polygons'] = polygons

    # Check alpha
    alpha = float(alpha)

    # Color by colorby
    if colorby in ["atom_type", "residue_type"]:
        colors = [cmap[_type]["color"] for _type in atoms[colorby]]
    elif colorby == "residue_num":
        cmap = k_different_colors(len(set(atoms["res_id"])))
        cmap = {
            res_num: color
            for res_num, color in zip(set(atoms["res_id"]), cmap)
        }
        colors = atoms["res_id"].apply(lambda x: cmap[x])
    else:
        raise ValueError
    atoms["color"] = colors

    for i, row in atoms.iterrows():
        colored_cell = matplotlib.patches.Polygon(row["polygons"],
                                                  facecolor=row['color'],
                                                  edgecolor=row['color'],
                                                  alpha=alpha,
                                                  linewidth=0.2)
        ax.add_patch(colored_cell)

    # atoms.loc[:,"color"] = color

    ax.set_xlim(vor.min_bound[0], vor.max_bound[0])
    ax.set_ylim(vor.min_bound[1], vor.max_bound[1])

    # Output image saving in any format; default jpg
    bs_out = 'out.jpg' if bs_out is None else bs_out

    # Get image as numpy array
    figure.tight_layout(pad=0)
    img = fig_to_numpy(figure, alpha=alpha)

    if save_fig:
        plt.subplots_adjust(bottom=0, top=1, left=0, right=1)
        plt.savefig(bs_out, frameon=False, pad_inches=False)

    plt.close(figure)
    del figure

    return atoms, vor, img