Ejemplo n.º 1
0
def test_read_write_data_file(tmp_path):
    """Data file read/write test."""
    input_path = Path(DATA_DIR) / Path("1kip.cif")
    with open(input_path, "rt") as input_file:
        data_list = pdbx.load(input_file)
    output_path = Path(tmp_path) / Path("testOutputDataFile.cif")
    with open(output_path, "wt") as output_path:
        pdbx.dump(data_list, output_path)
Ejemplo n.º 2
0
    def spatial_feature_to_DB(self, pdbID, chain):
        tableName = '_'.join([pdbID, chain])
        if self.sql_table_exist(tableName):
            print(f"{tableName} is already in the DB")
            return
        if not self.PDB_downloader(pdbID):
            return
        with open(self.workingRoot + pdbID + '.cif', 'r') as f:
            cifData = pdbx.load(f)[0]
        atomList = pd.DataFrame(
            cifData.get_object('atom_site')._row_list,
            columns=cifData.get_object('atom_site')._attribute_name_list)
        ############
        # Resolution not found in mmcif
        # Leave space here
        #####
        atomList = atomList[atomList['auth_asym_id'] == chain]
        if atomList.shape[0] < 1:
            print(f'chain {chain} not found in {pdbID} cif file')
            return
        res_info_chain = np.empty(2000,
                                  dtype=[
                                      ('pdbID', 'S5'), ('chain', 'S2'),
                                      ('type', 'S4'), ('pdbNum', 'int16'),
                                      ('authNum', 'int16'),
                                      ('center', 'float16,float16,float16'),
                                      ('direction', 'float16,float16,float16'),
                                      ('caPosition', 'float16,float16,float16')
                                  ])
        allPDBNum = atomList['label_seq_id'].unique()
        i = 0
        for PDBNum in allPDBNum:
            atomList = atomList[atomList["pdbx_PDB_model_num"] == '1']
            oneRes = atomList[atomList["label_seq_id"] == PDBNum]
            if oneRes['label_comp_id'].iloc[0] not in aminoAcidCodes:
                continue
            if oneRes['label_comp_id'].iloc[0] != 'GLY':
                try:  # in case some components missing
                    rGroup = oneRes[oneRes["label_atom_id"].isin(backBone)]
                    rGroupX = sum(pd.to_numeric(
                        rGroup['Cartn_x'])) / rGroup.shape[0]
                    rGroupY = sum(pd.to_numeric(
                        rGroup['Cartn_y'])) / rGroup.shape[0]
                    rGroupZ = sum(pd.to_numeric(
                        rGroup['Cartn_z'])) / rGroup.shape[0]
                    cToRGroup = np.subtract((float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_x']
                    ), float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_y']
                    ), float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_z'])),
                                            (rGroupX, rGroupY, rGroupZ))

                    res_info_chain[i] = (pdbID, chain,
                                         oneRes['label_comp_id'].iloc[0],
                                         PDBNum, oneRes["auth_seq_id"].iloc[0],
                                         (rGroupX, rGroupY, rGroupZ),
                                         tuple(cToRGroup),
                                         (float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_x']),
                                          float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_y']),
                                          float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_z'])))
                    i += 1
                except Exception as e:
                    print(e)
            else:
                try:
                    rGroupX = float(oneRes[oneRes['label_atom_id'] == 'C']['Cartn_x']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'N']['Cartn_x']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'O']['Cartn_x'])
                    rGroupY = float(oneRes[oneRes['label_atom_id'] == 'C']['Cartn_y']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'N']['Cartn_y']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'O']['Cartn_y'])
                    rGroupZ = float(oneRes[oneRes['label_atom_id'] == 'C']['Cartn_z']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'N']['Cartn_z']) + \
                              float(oneRes[oneRes['label_atom_id'] == 'O']['Cartn_z'])
                    cToRGroup = np.subtract((float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_x']
                    ), float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_y']
                    ), float(
                        oneRes[oneRes['label_atom_id'] == 'CA']['Cartn_z'])),
                                            (rGroupX, rGroupY, rGroupZ))
                    res_info_chain[i] = (pdbID, chain,
                                         oneRes['label_comp_id'].iloc[0],
                                         PDBNum, oneRes["auth_seq_id"].iloc[0],
                                         (rGroupX / 3, rGroupY / 3,
                                          rGroupZ / 3), tuple(cToRGroup),
                                         (float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_x']),
                                          float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_y']),
                                          float(oneRes[oneRes['label_atom_id']
                                                       == 'CA']['Cartn_z'])))
                    i += 1
                except Exception as e:
                    print(e)
        res_info_chain = res_info_chain[0:i]
        self.sql_create_table(tableName)
        conn = self.sql_connection()
        cursor = conn.cursor()
        try:
            for res1 in range(i):
                for res2 in range(i):
                    corrdinatesSubstract = np.subtract(
                        tuple(res_info_chain[res1]["center"]),
                        tuple(res_info_chain[res2]["center"]))
                    distance = np.sqrt(np.sum(corrdinatesSubstract**2))
                    vector1 = np.array(tuple(
                        res_info_chain[res1]["direction"]))
                    vector2 = np.subtract(
                        tuple(res_info_chain[res2]['center']),
                        tuple(res_info_chain[res1]['caPosition']))
                    if abs(distance
                           ) > 0.0001:  # Exclue 0 distance, e.g. self to self
                        angle = 180 * np.arccos(
                            0.99 * np.dot(vector1, vector2) /
                            (np.linalg.norm(vector1) *
                             np.linalg.norm(vector2))) / np.pi
                    else:
                        angle = 0
                    sqlCommand = f''' INSERT INTO `{tableName}`
                            (pdbID , chain,  pdbNum1 ,authNum1,resType1 ,pdbNum2 ,authNum2 ,resType2 ,distance,angle)
                    VALUES ('{pdbID}','{chain}','{res_info_chain[res1]['pdbNum']}','{res_info_chain[res1]['authNum']}',
                             '{res_info_chain[res1]['type'].decode('UTF-8')}','{res_info_chain[res2]['pdbNum']}',
                            '{res_info_chain[res2]['authNum']}',
                            '{res_info_chain[res2]['type'].decode('UTF-8')}',{distance},{angle})
                                   '''
                    cursor.execute(sqlCommand)
        except Exception as e:
            print(e)

        conn.commit()
        cursor.close()
        conn.close()
        print(f'{pdbID} {chain} done')
        return
Ejemplo n.º 3
0
def read_cif(cif_file):
    """Parse CIF-format data into array of Atom objects.

    .. todo::  Manage several blocks of data.

    :param file:  open file-like object
    :type file:  file
    :return:  (a dictionary indexed by PDBx/CIF record names, a list of record
        names that couldn't be parsed)
    :rtype:  (dict, [str])
    """
    pdblist = []  # Array of parsed lines (as objects)
    errlist = []  # List of record names that couldn't be parsed.
    if cif_file is None:
        return pdblist, errlist
    pdbdata = pdbx.load(cif_file)
    if len(pdbdata) > 0:
        for block in pdbdata:
            head_pdb, head_err = header(block)
            title_pdb, title_err = title(block)
            cmpnd_pdb, cmpnd_err = compnd(block)
            src_pdb, src_err = source(block)
            key_pdb, key_err = keywds(block)
            ex_pdb, ex_err = expdata(block)
            aut_pdb, aut_err = author(block)
            ssb_pdb, ssb_err = ssbond(block)
            cis_pdb, cis_err = cispep(block)
            cry_pdb, cry_err = cryst1(block)
            or_pdb, or_err = origxn(block)
            sc_pdb, sc_err = scalen(block)
            ato_pdb, ato_err = atom_site(block)
            con_pdb, con_err = conect(block)
            pdblist = (
                head_pdb
                + title_pdb
                + cmpnd_pdb
                + src_pdb
                + key_pdb
                + ex_pdb
                + aut_pdb
                + ssb_pdb
                + cis_pdb
                + cry_pdb
                + or_pdb
                + sc_pdb
                + ato_pdb
                + con_pdb
            )
            errlist = (
                head_err
                + title_err
                + cmpnd_err
                + src_err
                + key_err
                + ex_err
                + aut_err
                + ssb_err
                + cis_err
                + cry_err
                + or_err
                + sc_err
                + ato_err
                + con_err
            )
    else:
        _LOGGER.error("Unknown error while reading CIF file.")

    return pdblist, errlist
Ejemplo n.º 4
0
    def parse_cif_file(self, cif_file):
        """Parse CIF file into PDB entry.

        :param file cif_file:  CIF file to parse (file object ready for
            reading)
        """
        containers = pdbx.load(cif_file)
        if len(containers) > 1:
            errstr = f"Found {len(containers)} instead of 1."
            raise ValueError(errstr)
        else:
            container = containers[0]
        header = annotation.Header()
        if header.parse_cif(container):
            self._header = header
        obsolete = annotation.Obsolete()
        if obsolete.parse_cif(container):
            self._obsolete = obsolete
        self.parse_cif_title(container)
        caveat = annotation.Caveat()
        if caveat.parse_cif(container):
            self._caveat = caveat
        compound = annotation.Compound()
        if compound.parse_cif(container):
            self._compound = compound
        source = annotation.Source()
        if source.parse_cif(container):
            self._source = source
        keywords = annotation.Keywords()
        if keywords.parse_cif(container):
            self._keyword = keywords
        exp_data = annotation.ExperimentalData()
        if exp_data.parse_cif(container):
            self._experimental_data = exp_data
        model_type = annotation.ModelType()
        if model_type.parse_cif(container):
            self._model_type = model_type
        authors = annotation.Author()
        if authors.parse_cif(container):
            self._author = authors
        rev_data = annotation.RevisionData()
        if rev_data.parse_cif(container):
            self._revision_data = rev_data
        supersedes = annotation.Supersedes()
        if supersedes.parse_cif(container):
            self._supersedes = supersedes
        journals = annotation.Journal.parse_cif(container)
        self._journal = journals
        _LOGGER.warning("Not parsing REMARK records from CIF.")
        db_refs = primary.DatabaseReference.parse_cif(container)
        self._database_reference = db_refs
        sequence_diffs = primary.SequenceDifferences.parse_cif(container)
        self._sequence_difference = sequence_diffs
        sequence_residues = primary.SequenceResidues()
        if sequence_residues.parse_cif(container):
            self._sequence_residue = sequence_residues
        modified_residues = primary.ModifiedResidue.parse_cif(container)
        if modified_residues:
            self._modified_residue = modified_residues
        heterogens = heterogen.Heterogen.parse_cif(container)
        if heterogens:
            self._heterogen = heterogens
        het_name = heterogen.HeterogenName()
        if het_name.parse_cif(container):
            self._heterogen_name = het_name
        het_syn = heterogen.HeterogenSynonym()
        if het_syn.parse_cif(container):
            self._heterogen_synonym = het_syn
        _LOGGER.warning("Not parsing FORMULA records from CIF.")
        helices = secondary.Helix.parse_cif(container)
        if helices:
            self._helix = helices
        _LOGGER.warning("Not parsing SHEET records from CIF.")
        disulfides = secondary.DisulfideBond.parse_cif(container)
        if disulfides:
            self._disulfide_bond = disulfides
        links = secondary.Link.parse_cif(container)
        if links:
            self._link = links
        cis_peps = secondary.CisPeptide.parse_cif(container)
        if cis_peps:
            self._cis_peptide = cis_peps
        site = annotation.Site()
        if site.parse_cif(container):
            self._site = site
        unit_cell = crystallography.UnitCell()
        if unit_cell.parse_cif(container):
            self._unit_cell = unit_cell
        transforms = crystallography.OriginalTransform.parse_cif(container)
        if transforms:
            self._orig_transform = transforms
        transforms = crystallography.FractionalTransform.parse_cif(container)
        if transforms:
            self._frac_transform = transforms
        transforms = crystallography.NoncrystalTransform.parse_cif(container)
        if transforms:
            self._noncrystal_transform = transforms
        models = coordinates.Model.parse_cif(container)
        if models:
            self._models = models
        atoms = coordinates.Atom.parse_cif(container)
        het_atoms = coordinates.HeterogenAtom.parse_cif(container)
        temp_factors = coordinates.TemperatureFactor.parse_cif(container)
        for model in models:
            model_num = model.serial
            try:
                model.records += atoms[model_num]
            except KeyError:
                _LOGGER.debug(f"No ATOM records for model {model_num}.")
            try:
                model.records += het_atoms[model_num]
            except KeyError:
                _LOGGER.debug(f"No HETATM records for model {model_num}.")
            try:
                model.records += temp_factors[model_num]
            except KeyError:
                _LOGGER.debug(f"No ANISOU records for model {model_num}.")
        self._model = models