def test_notSupportedExtension(self): """Tests exception throwing when reading file with unsupported extension. """ fpath = os.path.join(TESTDIR, 'illusions', 'void.pdb') with self.assertRaises(ia.structure.StructureError): ia.read(fpath)
def test_wrongFileType(self): """Tests exception throwing when reading file with wrong user-defined type. """ fpath = os.path.join(TESTDIR, 'data', 'mini.pdb') with self.assertRaises(ia.structure.StructureError): ia.read(fpath, ftype='cif')
def test_missingFile(self): """Tests exception throwing when reading non-existent file. """ fpath = os.path.join(TESTDIR, 'illusions', 'void.pdb') with self.assertRaises(ia.structure.StructureError): ia.read(fpath)
def setUp(self): pp_path = os.path.join(TESTDIR, 'data', 'protein_protein.cif') pl_path = os.path.join(TESTDIR, 'data', 'protein_ligand.pdb') pp_complex = ia.read(pp_path) pl_complex = ia.read(pl_path) self.pp_ia = ia.InteractionAnalyzer(pp_complex) self.pl_ia = ia.InteractionAnalyzer(pl_complex)
def test_readCIF(self): """Tests reading/parsing a sample mmCIF file. """ fpath = os.path.join(TESTDIR, 'data', 'mini.cif') s = ia.read(fpath) top = s.topology self.assertEqual(top.getNumAtoms(), self.n_atoms) self.assertEqual(top.getNumResidues(), self.n_residues) self.assertEqual(top.getNumChains(), self.n_chains)
def setUp(self): fpath = os.path.join(TESTDIR, 'data', 'mini.pdb') self.struct = ia.read(fpath)
def comb_int(pdbfile, project_id, itype, include_intra=False, **kwargs): """Analyzes the interactions in one or more PDB files. Arguments --------- .... """ curdir = pathlib.Path('.').resolve(strict=True) output_dir = curdir / str( project_id[0]) / '02_interfacea_results' / f'{itype}' output_dir.mkdir(parents=True, exist_ok=True) # Setup interfacea ia.set_log_level('minimal') mol = ia.read(str(pdbfile)) # convert from Path to str. analyzer = ia.InteractionAnalyzer(mol) func = getattr(analyzer, f'get_{itype}', None) if func is None: raise ValueError(f'Unknown analysis function: get_{itype}') kwargs['include_intra'] = include_intra func(**kwargs) df_table = analyzer.itable._table df_table.columns = [ 'itype', 'donor_chain', 'acceptor_chain', 'donor_resnm', 'acceptor_resnm', 'donor_resid', 'acceptor_resid', 'donor_atom', 'acceptor_atom' ] if len(df_table) == 0: logging.info(f'No interactions of type "{itype}" found in input file') return donor_list = df_table.apply( lambda x: x['donor_resnm'] + str(x['donor_resid']), axis=1) df_table.loc[:, 'donor'] = donor_list donorC_list = df_table.apply( lambda x, sep="_": x['donor'] + "_" + str(x['donor_chain']), axis=1) df_table.loc[:, 'donorC'] = donorC_list acceptor_list = df_table.apply( lambda x: x['acceptor_resnm'] + str(x['acceptor_resid']), axis=1) df_table.loc[:, 'acceptor'] = acceptor_list acceptorC_list = df_table.apply( lambda x, sep="_": x['acceptor'] + "_" + str(x['acceptor_chain']), axis=1) df_table.loc[:, 'acceptorC'] = acceptorC_list donor_acceptor_list = df_table.apply( lambda x, sep=":": x['donorC'] + sep + str(x['acceptorC']), axis=1) df_table.loc[:, 'donor_acceptor'] = donor_acceptor_list chain_type = df_table.apply( lambda x: ("intra" if (x["acceptor_chain"] == x["donor_chain"]) else "inter"), axis=1) df_table.loc[:, 'chain_type'] = chain_type # You could probably move this to a data.py module. # Made them sets since you are doing 'x in y' operations. protein_residues = { 'ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL' } dna_residues = {'DA', 'DC', 'DG', 'DT'} prot_or_dna = [] for i in range(len(df_table.donor_resnm)): d_is_dna = df_table.donor_resnm[i] in dna_residues d_is_prot = df_table.donor_resnm[i] in protein_residues a_is_dna = df_table.acceptor_resnm[i] in dna_residues a_is_prot = df_table.acceptor_resnm[i] in protein_residues if d_is_dna and a_is_dna: cmplx_type = 'dna-dna' elif d_is_prot and a_is_prot: cmplx_type = 'protein-protein' elif (d_is_dna and a_is_prot) or (d_is_prot and a_is_dna): cmplx_type = 'protein-dna' else: cmplx_type = 'other' # because why not? prot_or_dna.append(cmplx_type) df_table["prot_or_dna"] = prot_or_dna # renumber index of hbond dataframe df_table.index = list(range(len(df_table))) if itype == 'hbonds': non_spp_atoms = { "O2P", "O1P", "N", "O", "OC1", "OC2", "O4'", "O5'", "O3'", "H", "HA" } specificity = [] for i in df_table.index: specificity_acc = df_table["acceptor_atom"][i] in non_spp_atoms specificity_donor = df_table["acceptor_atom"][i] in non_spp_atoms if specificity_acc or specificity_donor: specificity.append("non-specific") else: specificity.append("specific") df_table['specificity'] = specificity frame_no = pdbfile.stem.split('md_')[1] df_table["time"] = [frame_no] * len(df_table) logging.info(f'Writing "{itype}" bonds to disk...') df_table.to_csv(str(output_dir / f'md_{frame_no}.pdb_{itype}_all.csv'), index=False)
def comb_int(pdb: "pdb file", complexName, intType: "interaction type", includeIntra, hbond_distance): pathx = os.getcwd() if not os.path.exists(f'{complexName}/03_interfacea_results/{intType}'): os.makedirs(f'{complexName}/03_interfacea_results/{intType}', exist_ok=True) pwrite = f'{pathx}/{complexName}/03_interfacea_results/{intType}' ia.set_log_level('minimal') mol = ia.read(pdb) analyzer = ia.InteractionAnalyzer(mol) if not str(intType) == "hbonds": y = "get_" + str(intType) b = getattr(analyzer, y) if str(includeIntra) == "False": b() else: b(include_intra=includeIntra) bb = analyzer.itable._table else: if str(includeIntra) == "False": analyzer.get_hbonds(max_distance=float(hbond_distance)) else: analyzer.get_hbonds(include_intra=includeIntra, max_distance=float(hbond_distance)) bb = analyzer.itable._table bonds = bb bonds.columns = [ 'itype', 'donor_chain', 'acceptor_chain', 'donor_resnm', 'acceptor_resnm', 'donor_resid', 'acceptor_resid', 'donor_atom', 'acceptor_atom' ] df_table = bonds try: donor_list = df_table.apply( lambda x: x['donor_resnm'] + str(x['donor_resid']), axis=1) df_table.loc[:, 'donor'] = donor_list donorC_list = df_table.apply( lambda x, sep="_": x['donor'] + "_" + str(x['donor_chain']), axis=1) df_table.loc[:, 'donorC'] = donorC_list acceptor_list = df_table.apply( lambda x: x['acceptor_resnm'] + str(x['acceptor_resid']), axis=1) df_table.loc[:, 'acceptor'] = acceptor_list acceptorC_list = df_table.apply( lambda x, sep="_": x['acceptor'] + "_" + str(x['acceptor_chain']), axis=1) df_table.loc[:, 'acceptorC'] = acceptorC_list donor_acceptor_list = df_table.apply( lambda x, sep=":": x['donorC'] + sep + str(x['acceptorC']), axis=1) df_table.loc[:, 'donor_acceptor'] = donor_acceptor_list chain_type = df_table.apply( lambda x: "intra" if (x["acceptor_chain"] == x["donor_chain"]) else "inter", axis=1) df_table.loc[:, 'chain_type'] = chain_type proteinResidues = [ 'ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL' ] dnaResidues = ['DA', 'DC', 'DG', 'DT'] prot_or_dna = [] for i in range(len(df_table.donor_resnm)): if (((df_table.donor_resnm[i] in dnaResidues) and (df_table.acceptor_resnm[i] in proteinResidues)) or ((df_table.donor_resnm[i] in proteinResidues) and (df_table.acceptor_resnm[i] in dnaResidues))): prot_or_dna.append("protein-dna") elif (((df_table.donor_resnm[i] in proteinResidues) and (df_table.acceptor_resnm[i] in proteinResidues)) or ((df_table.donor_resnm[i] in proteinResidues) and (df_table.acceptor_resnm[i] in proteinResidues))): prot_or_dna.append("protein-protein") elif (((df_table.donor_resnm[i] in dnaResidues) and (df_table.acceptor_resnm[i] in dnaResidues)) or ((df_table.donor_resnm[i] in dnaResidues) and (df_table.acceptor_resnm[i] in dnaResidues))): prot_or_dna.append("dna-dna") df_table["prot_or_dna"] = prot_or_dna # renumber index of hbond dataframe df_table.index = range(len(df_table)) if str(intType) == "hbonds": non_spp_atoms = [ "O2P", "O1P", "N", "O", "OC1", "OC2", "O4'", "O5'", "O3'", "H", "HA" ] x = [] for i in range(len(df_table)): if (df_table["acceptor_atom"][i] or df_table["donor_atom"][i]) in non_spp_atoms: x.append("non-specific") else: x.append("specific") df_table.loc[:, 'specificity'] = x time = pdb.split("md_")[1].split(".")[0] times = list(itertools.repeat(time, len(df_table))) df_table.loc[:, "time"] = times logging.info("Writing %s bonds to files...", intType) df_table.to_csv(f'{pwrite}/md_{time}.pdb_{intType}_all.csv', index=False) except ValueError: logging.info(f'Found 0 {intType} interactions !!')
def setUp(self): fpath = os.path.join(TESTDIR, 'data', 'protein_ligand.pdb') self.struct = ia.read(fpath)
def setUp(self): self.pdb = os.path.join(TESTDIR, 'data', 'mini.pdb') self.cif = os.path.join(TESTDIR, 'data', 'mini.cif') self.struct = ia.read(self.pdb)
def setUp(self): self.struct = ia.read(os.path.join(TESTDIR, 'data', 'mini_noH.pdb'))
def setUp(self): self.s_allatom = ia.read(os.path.join(TESTDIR, 'data', 'mini.pdb')) self.s_missing = ia.read( os.path.join(TESTDIR, 'data', 'mini_incomplete.pdb'))