Beispiel #1
0
    def extract_ligand_records(self, pdb_file, relevant_ligands):
        """
        Extract all instances of fragment-containing small molecules bound to a PDB

        :param pdb_file: Path to the PDB file containing the fragment-containing ligand
        :param relevant_ligands: dict of Ideal_Ligand_PDB_Containers for ligands to find in pdb_file
        :return: relevant_ligands_prody_dict - structured [resname, chain, resnum] : ligand prody
        """
        pdb_header = prody.parsePDB(pdb_file, model=0, header=True)

        # Find resnums for all instances of fragment-containing small molecules bound to this PDBrelevant_ligands
        relevant_ligand_resnums = [(res.chain, res.resnum, res)
                                   for res in pdb_header['chemicals']
                                   if res.resname in relevant_ligands.keys()]

        # Pull all relevant ligands from PDB as a prody AtomGroup objects
        pdb_prody_hv = prody.parsePDB(pdb_file, altloc=True).getHierView()
        relevant_ligands_prody_dict = dict()

        for ligand_chain, ligand_resnum, res in relevant_ligand_resnums:

            ligand_pdb_prody = pdb_prody_hv.getResidue(ligand_chain,
                                                       ligand_resnum)

            # Issues occur when alternate coordinates are parsed for a ligand... we don't want those anyways
            if ligand_pdb_prody is None:
                continue

            relevant_ligands_prody_dict[(res.resname, ligand_chain,
                                         ligand_resnum)] = ligand_pdb_prody

        return relevant_ligands_prody_dict
Beispiel #2
0
def get_alphashape(pdb, chain=None, plot=False):
    '''
    Returns an AlphaShape object of a pdb file, outlining its general
    shape for use in a clash filter.
    '''
    if chain:
        atoms = prody.parsePDB(pdb, chain=chain)
    else:
        atoms = prody.parsePDB(pdb)
        atoms = atoms.select('not chain A')
    # For some reason there is a level which is not populated. This may
    # be for proteins w/ multiple chains.
    coordsets = atoms.getCoordsets()
    coords = []
    for coordset in coordsets:
        coords.extend(coordset)

    # coords = [(0., 0.), (0., 1.), (1., 1.), (1., 0.), (0.5, 0.5)]

    alpha_shape = alphashape.alphashape(coords, 0.18)

    if plot:
        helix = prody.parsePDB(pdb, chain='A')
        helixcoords = helix.getCoordsets()[0]
        fig = plt.figure()
        # ax = fig.add_subplot(projection='3d')
        ax = Axes3D(fig)
        ax.scatter(*zip(*coords))
        ax.scatter(*zip(*helixcoords))
        # # ax.add_patch(PolygonPatch(alpha_shape, alpha=0.2))
        ax.plot_trisurf(*zip(*alpha_shape.vertices),
                        triangles=alpha_shape.faces,
                        alpha=0.3)
        plt.show()
    return alpha_shape
Beispiel #3
0
    def prepare_pdb22(self, out_prefix, csets=None, **kwargs):
        csets = self._make_csets(csets)

        nmin, psf = self._prepare_pdb22_one_frame(out_prefix, **kwargs)
        nmin_ag = prody.parsePDB(nmin)

        if len(csets) == 1:
            self.ag = nmin_ag
            self.save(nmin)
            return self, psf

        if nmin_ag.numAtoms() == self.ag.numAtoms():
            if list(nmin_ag.getNames()) != list(self.ag.getNames()):
                nmin_ag = self._match_by_residue_position(self.ag, nmin_ag)
            else:
                nmin_ag.setCoords(self.ag.getCoordsets())
        else:
            logger.info('Molecule was altered during preparation, preparing each frame separately')
            new_csets = []
            for cset in csets:
                nmin_frame, psf_frame = self._prepare_pdb22_one_frame(out_prefix + '-%i-tmp' % cset, cset=0, **kwargs)
                ag_frame = prody.parsePDB(nmin_frame)
                assert (list(nmin_ag.getNames()) == list(ag_frame.getNames()))

                new_csets.append(ag_frame.getCoords())
                nmin_frame.remove()
                psf_frame.remove()

            nmin_ag.setCoords(np.array(new_csets))

        self.ag = nmin_ag
        self.save(nmin)
        return self, psf
Beispiel #4
0
    def get_vdms(self, df, path_to_vdm=None):
        path = path_to_vdm or self._directory.split('csv')[0] + 'vdM/'
        with os.scandir(path) as it:
            for entry in it:
                if entry.name[0] != '.':
                    filename_end = '_'.join(entry.name.split('_')[4:])
                    break

        if 'query_name' in df.columns:
            for n, row in df[['iFG_count', 'vdM_count',
                              'query_name']].iterrows():
                try:
                    yield pr.parsePDB(path + 'iFG_' + str(row['iFG_count']) +
                                      '_vdM_' + str(row['vdM_count']) + '_' +
                                      filename_end)
                except Exception:
                    traceback.print_exc()
        else:
            for n, row in df[['iFG_count', 'vdM_count']].iterrows():
                try:
                    yield pr.parsePDB(path + 'iFG_' + str(row['iFG_count']) +
                                      '_vdM_' + str(row['vdM_count']) + '_' +
                                      filename_end)
                except Exception:
                    traceback.print_exc()
Beispiel #5
0
def native_contact(rec_path, reorder_path, dock_path):

    parsed_docked = prody.parsePDB(dock_path).select('not hydrogen')
    parsed_crystal = prody.parsePDB(reorder_path).select('not hydrogen')
    parsed_rec = prody.parsePDB(rec_path).select('not hydrogen')

    cry_atom_num = parsed_crystal.numAtoms()
    lig_atom_num = parsed_docked.numAtoms()

    assert cry_atom_num == lig_atom_num

    docked_coords = parsed_docked.getCoordsets()
    crystal_coord = parsed_crystal.getCoords()
    rec_coord = parsed_rec.getCoords()

    exp_crystal_coord = np.expand_dims(crystal_coord, -2)
    cry_diff = exp_crystal_coord - rec_coord
    cry_distance = np.sqrt(np.sum(np.square(cry_diff), axis=-1))

    exp_docked_coords = np.expand_dims(docked_coords, -2)
    docked_diff = exp_docked_coords - rec_coord
    docked_distance = np.sqrt(np.sum(np.square(docked_diff), axis=-1))

    cry_contact = (cry_distance < distance_threshold).astype(int)

    num_contact = np.sum(cry_contact).astype(float)

    lig_contact = (docked_distance < distance_threshold).astype(int)

    contact_ratio = np.sum(cry_contact * lig_contact,
                           axis=(-1, -2)) / num_contact

    return [list(contact_ratio)]
Beispiel #6
0
    def __init__(self, pdb_file=None, pdb_list=None, ag=None):

        if pdb_file is not None:
            if isinstance(pdb_file, prody.Atomic):
                self.ag = pdb_file
            elif isinstance(pdb_file, str):
                self.ag = prody.parsePDB(pdb_file)
            else:
                raise ValueError('Wrong type of parameter `pdb_file` ({})'.format(type(pdb_file)))

        elif pdb_list is not None:
            ag_first = prody.parsePDB(pdb_list[0])
            new_csets = []
            for f in pdb_list:
                ag = prody.parsePDB(f)
                assert (list(ag.getNames()) == list(ag_first.getNames()))
                new_csets.append(ag.getCoords())
            ag_first.setCoords(np.array(new_csets))
            self.ag = ag_first

        elif ag is not None:
            self.ag = ag.copy()

        else:
            raise ValueError('No molecules specified')
Beispiel #7
0
def overlap(reorder_path, dock_path):
    """
    calculate overlap for the docking result

    args:
        reorder_path:: str
            path of reorder ligand

        dock_path:: str
            path of docking result
        
    returns:
        overlap:: float
            overlap value
    """
    docked_coords = prody.parsePDB(dock_path).getCoordsets()
    crystal_coords = prody.parsePDB(reorder_path).getCoords()

    expanded_docked = np.expand_dims(docked_coords, -2)
    diff = expanded_docked - crystal_coords
    distance = np.sqrt(np.sum(np.power(diff, 2), axis=-1))   

    all_clash = (distance < clash_cutoff_A).astype(float)  
    atom_clash = (np.sum(all_clash, axis=-1) > 0).astype(float) 
    position_clash_ratio = np.mean(atom_clash, axis=-1) 

    return [list(position_clash_ratio)]
Beispiel #8
0
def assign_pcs(args):
    fn, topf, eda, pcs, sel, outf = args

    if fn.endswith("pdb"):
        pdb = prody.parsePDB(fn)
        pdb = pdb.select(sel).copy()

        ensemble = prody.Ensemble('A single pdb file ensemble')
        ensemble.setCoords(pdb.getCoords())
        ensemble.addCoordset(pdb.getCoordsets())
        ensemble.iterpose()

        PCs = prody.calcProjection(ensemble, eda[pcs])
        print(PCs)
        return
    elif fn.endswith(".dcd"):

        structure = prody.parsePDB(topf)
        str_sel = structure.select(sel)

        #dcd = prody.DCDFile(fn)
        dcd = prody.Trajectory(fn)
        dcd.link(structure)
        dcd.setCoords(structure)
        dcd.setAtoms(str_sel)

        PCs = prody.calcProjection(dcd, eda[pcs])
        if outf is not None:
            header = " ".join(["PC%d" % (i + 1) for i in pcs])
            np.savetxt(outf, PCs, fmt="%.4f", header=header, comments="")
    else:
        print("Unsupport file type: %s" % fn)
        return None
    return PCs
Beispiel #9
0
def createPCAMOdes(base_path, protein_list):
    for protein in protein_list:
        receptor = os.path.join(base_path,
                                protein) + "/{}A-unbound.pdb".format(protein)
        ligand = os.path.join(base_path,
                              protein) + "/{}B-unbound.pdb".format(protein)
        pca_rec_folder = "{}/{}/input/pca/concoord/receptor".format(
            base_path, protein)
        pca_lig_folder = "{}/{}/input/pca/concoord/ligand".format(
            base_path, protein)

        dist_rec = "{}/{}A-dist".format(pca_rec_folder, protein)
        dist_lig = "{}/{}B-dist".format(pca_lig_folder, protein)

        disco_rec = "{}/{}A-disco.pdb".format(pca_rec_folder, protein)
        disco_lig = "{}/{}B-disco.pdb".format(pca_lig_folder, protein)

        nmdfile_rec = "{}/{}A-nmd".format(pca_rec_folder, protein)
        nmdfile_lig = "{}/{}B-nmd".format(pca_lig_folder, protein)

        os.system("mkdir -p {}".format(pca_rec_folder))
        os.system("mkdir -p {}".format(pca_lig_folder))
        #pwd = os.getcwd()

        os.chdir(pca_rec_folder)
        p = Popen([
            "/home/glenn/Documents/Masterarbeit/concoord/bin/dist", "-p",
            receptor
        ],
                  stdin=PIPE)  #, shell=True #,"-op",dist_rec
        p.communicate(input=b'1\n1\n')
        os.system(
            "/home/glenn/Documents/Masterarbeit/concoord/bin/disco -on {} -n 200 -i 1000 -viol 1. -bump "
            .format(disco_rec))

        os.chdir(pca_lig_folder)
        p = Popen([
            "/home/glenn/Documents/Masterarbeit/concoord/bin/dist", "-p",
            ligand
        ],
                  stdin=PIPE)  #, shell=True
        p.communicate(input=b'1\n1\n')
        os.system(
            "/home/glenn/Documents/Masterarbeit/concoord/bin/disco -on {} -n 200 -i 1000 -viol 1. -bump  "
            .format(disco_lig))

        try:
            pca_rec = calcPCA(disco_rec)
            atoms_rec = dy.parsePDB(receptor, subset='ca')
            dy.writeNMD(nmdfile_rec, pca_rec, atoms_rec)
        except:
            pass

        try:
            pca_lig = calcPCA(disco_lig)
            atoms_lig = dy.parsePDB(ligand, subset='ca')
            dy.writeNMD(nmdfile_lig, pca_lig, atoms_lig)
        except:
            pass
Beispiel #10
0
def rmsd(reorder_path, dock_path):

    docked_coords = prody.parsePDB(dock_path).getCoordsets()
    crystal_coords = prody.parsePDB(reorder_path).getCoords()

    rmsd = np.sqrt(np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1), axis=-1))

    return [list(rmsd)]
Beispiel #11
0
 def Resid(self, pdbFile, pdbChain1, pdbChain2, ligandName, runfolder):
     """
     Return a list of residue in `Receptor` which distance from `Ligand` is less or equal 5 angstroms.
     """
     # print "wthelelelelle"
     print self.runfolder
     os.chdir(runfolder)
     # acpype = glob('*.acpype')
     if pdbChain2 != '':
         a = prody.parsePDB(
             str(pdbFile)).select('(' + pdbChain1 + ')' +
                                  ' and within 5 of chain ' + pdbChain2)
         residList = np.array(list(sorted(set(a.getResnums()))),
                              dtype='str')
         residlist = " ".join(residList)
         # for i in range(1, len(residList)):
         # residlist += ' ' + str(residList[i])
         with open('cutoff-resid-5angstroms', 'w') as residfile:
             residfile.write(residlist)
         return residList, str(runfolder + '/cutoff-resid-5angstroms')
     elif ligandName != '':
         receptor = prody.parsePDB(str(pdbFile))
         Ligand = []
         # for i in range(len(acpype)):
         # Ligand.append(str(acpype[i].strip('.acpype')))
         # print "is it a bug here??"
         # print Ligand
         # ligand = []
         # for i in range(len(Ligand)):
         Ligand.append(
             prody.parsePDB(
                 str(ligandName) + '.acpype/' + str(ligandName) +
                 '_NEW.pdb'))
         # print ligand
         protein = receptor
         # print "this is protein before add ligand[i]"
         # print protein
         haha = []
         for i in range(len(Ligand)):
             protein += Ligand[i]
             haha = np.array(list(sorted(set(Ligand[i].getResnames()))),
                             dtype='str')
         # print "this is protein after add ligand[i]"
         # print protein
         # print haha
         # print type(haha)
         ligands = ' or resname '.join(haha)
         hoho = protein.select('(' + pdbChain1 + ')' +
                               ' and within 5 of resname ' + ligands)
         residList = list(sorted(set(hoho.getResnums())))
         residList = np.array(residList, dtype='str')
         residlist = " ".join(residList)
         # print residlist
         # for i in range(1, len(residList)):
         # residlist += ' ' + str(residList[i])
         with open('cutoff-resid-5angstroms', 'w') as residfile:
             residfile.write(residlist)
         return residList, str(runfolder + '/cutoff-resid-5angstroms')
Beispiel #12
0
    def write_superposed_pdbs(self, output_pdb_folder, alignments: dict = None):
        """
        Superposes PDBs according to alignment and writes transformed PDBs to files
        (View with Pymol)

        Parameters
        ----------
        alignments
        output_pdb_folder
        """
        if alignments is None:
            alignments = self.alignment
        output_pdb_folder = Path(output_pdb_folder)
        if not output_pdb_folder.exists():
            output_pdb_folder.mkdir()
        reference_name = self.structures[0].name
        reference_pdb = pd.parsePDB(
            str(self.output_folder / f"cleaned_pdb/{self.structures[0].name}.pdb")
        )
        core_indices = np.array(
            [
                i
                for i in range(len(alignments[reference_name]))
                if -1 not in [alignments[n][i] for n in alignments]
            ]
        )
        aln_ref = alignments[reference_name]
        ref_coords_core = (
            reference_pdb[helper.get_alpha_indices(reference_pdb)]
            .getCoords()
            .astype(np.float64)[np.array([aln_ref[c] for c in core_indices])]
        )
        ref_centroid = helper.nb_mean_axis_0(ref_coords_core)
        ref_coords_core -= ref_centroid
        transformation = pd.Transformation(np.eye(3), -ref_centroid)
        reference_pdb = pd.applyTransformation(transformation, reference_pdb)
        pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb)
        for i in range(1, len(self.structures)):
            name = self.structures[i].name
            pdb = pd.parsePDB(
                str(self.output_folder / f"cleaned_pdb/{self.structures[i].name}.pdb")
            )
            aln_name = alignments[name]
            common_coords_2 = (
                pdb[helper.get_alpha_indices(pdb)]
                .getCoords()
                .astype(np.float64)[np.array([aln_name[c] for c in core_indices])]
            )
            (
                rotation_matrix,
                translation_matrix,
            ) = superposition_functions.svd_superimpose(
                ref_coords_core, common_coords_2
            )
            transformation = pd.Transformation(rotation_matrix.T, translation_matrix)
            pdb = pd.applyTransformation(transformation, pdb)
            pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
Beispiel #13
0
def paste_loop(path_to_loop,
               path_to_pdb,
               query_selection_N,
               query_selection_C,
               query_length_N=4,
               query_length_C=4,
               include_sidechains=False):
    loop = pr.parsePDB(path_to_loop)
    loop.setSegnames('A')
    loop_bb = loop.select('backbone')
    pdb = pr.parsePDB(path_to_pdb)
    query_N = pdb.select(query_selection_N)
    query_N_bb = query_N.select('name N C CA')
    query_C = pdb.select(query_selection_C)
    query_C_bb = query_C.select('name N C CA')

    first_resnum_loop = loop_bb.getResnums()[0]
    last_resnum_loop = loop_bb.getResnums()[-1]
    loop_N_bb = loop_bb.select('name N C CA and resnum `' +
                               str(first_resnum_loop) + 'to' +
                               str(first_resnum_loop + query_length_N - 1) +
                               '`')
    loop_C_bb = loop_bb.select('name N C CA and resnum `' +
                               str(last_resnum_loop - query_length_C + 1) +
                               'to' + str(last_resnum_loop) + '`')

    try:
        coords_diff_N = loop_N_bb.getCoords() - query_N_bb.getCoords()
        coords_diff_C = loop_C_bb.getCoords() - query_C_bb.getCoords()
    except ValueError:
        print('Loop failure')

    ind_match_N = np.argmin([np.linalg.norm(i) for i in coords_diff_N])
    ind_match_C = np.argmin([np.linalg.norm(i) for i in coords_diff_C])

    loop_N_bb_index = loop_N_bb.getIndices()[ind_match_N]
    loop_C_bb_index = loop_C_bb.getIndices()[ind_match_C]
    query_N_bb_index = query_N_bb.getIndices()[ind_match_N]
    query_C_bb_index = query_C_bb.getIndices()[ind_match_C]
    first_index_pdb = pdb.select('backbone').getIndices()[0]
    last_index_pdb = pdb.select('backbone').getIndices()[-1]

    loop_slice = loop_bb.select('index ' + str(loop_N_bb_index) + 'to' +
                                str(loop_C_bb_index))
    if not include_sidechains:
        pdb_N = pdb.select('backbone and index ' + str(first_index_pdb) +
                           'to' + str(query_N_bb_index - 1))
        pdb_C = pdb.select('backbone and index ' + str(query_C_bb_index + 1) +
                           'to' + str(last_index_pdb))
    else:
        pdb_N = pdb.select('index ' + str(first_index_pdb) + 'to' +
                           str(query_N_bb_index - 1))
        pdb_C = pdb.select('index ' + str(query_C_bb_index + 1) + 'to' +
                           str(last_index_pdb))
    return pdb_N, loop_slice, pdb_C
    def _import_pdbs(self):
        """
        For each fragment ensemble, converts each residue in all processed PDBs into objects with representative 
        matrices and other relevant information
        
        :return: 
        """
        processsed_residue_list = []
        for pdb in pdb_check(self.processed_PDBs_dir):

            # Make sure I can load things...
            try:

                prody_protein = prody.parsePDB(pdb)

                # Check that residues exist within cutoff distance provided in alignments, otherwise pass
                prody_protein_selection = prody_protein.select(
                    'protein and not hetatm')
                if prody_protein_selection == None:
                    continue
                else:
                    prody_protein_hv = prody_protein_selection.getHierView()

            except Exception as e:
                print(e)
                continue

            pdb_info = os.path.basename(os.path.normpath(pdb))
            prody_ligand = prody.parsePDB(pdb).select(
                'hetatm and resname {}'.format(pdb_info.split('_')[1]))

            # todo: CATCH THIS!!!
            if prody_ligand is None: continue

            # Iterate over residues in contacts and generate representative vector with weights applied
            processsed_residue_list += [
                fragment_PDB(
                    residue,
                    pdb_info,
                    prody_ligand,
                ) for residue in prody_protein_hv.iterResidues()
            ]

        processsed_residue_list_cleaned = [
            residue for residue in processsed_residue_list
            if residue.viable is not None
        ]
        print(
            f'Unique processed and viable residues: {len(processsed_residue_list_cleaned)}'
        )

        return processsed_residue_list_cleaned
Beispiel #15
0
def get_chain_from_astral_id(astral_id, d):
    """Given an ASTRAL ID and the ASTRAL->PDB/chain mapping dictionary, this function
    attempts to return the relevant, parsed ProDy object."""
    pdbid, chain = d[astral_id]
    assert "," not in chain, f"Issue parsing {astral_id} with chain {chain} and pdbid " \
                             f"{pdbid}."
    chain, resnums = chain.split(":")

    if astral_id == "d4qrye_" or astral_id in ASTRAL_IDS_INCORRECTLY_PARSED:
        chain = "A"
        resnums = ""

    # Handle special case https://github.com/prody/ProDy/issues/1197
    if astral_id == "d1tocr1":
        # a = pr.performDSSP("1toc")
        a = pr.parsePDB("1toc", chain="R")
        a = a.select("(chain R) and (resnum 2 to 59 or resnum 1A)"
                     )  # Note there is no 1B
        return a

    a = pr.parsePDB(pdbid, chain=chain)
    if resnums != "":
        # This pattern matches ASTRAL number ranges like 1-100, 1A-100, -1-39, -4--1, etc.
        p = re.compile(
            r"((?P<d1>-?\d+)(?P<ic1>\w?))-((?P<d2>-?\d+)(?P<ic2>\w?))")
        match = p.match(resnums)
        start, start_icode = int(match.group("d1")), match.group("ic1")
        end, end_icode = int(match.group("d2")), match.group("ic2")

        # Ranges with negative numbers must be escaped with ` character
        range_str = f"{start} to {end}"
        if start < 0 or end < 0:
            range_str = f"`{range_str}`"

        if not start_icode and not end_icode:
            # There are no insertion codes. Easy case.
            selection_str = f"resnum {range_str}"
        elif (start_icode and not end_icode) or (not start_icode
                                                 and end_icode):
            # If there's only one insertion code, this selection is not well defined
            # and must be handled by special cases above.
            raise ValueError(f"Unsupported ASTRAL range {astral_id}.")
        elif start_icode and end_icode:
            if start_icode == end_icode:
                selection_str = f"resnum {range_str} and icode {start_icode}"
            else:
                raise ValueError(f"Unsupported ASTRAL range {astral_id}.")

        a = a.select(selection_str)

    return a
Beispiel #16
0
def fix_openmm():
    # get the whole crystal structure
    # get only the ATOM records
    # and HETAM records for MSE
    # convert MSE to MET
    with open('no_smet.pdb', 'w') as outfile:
        with open('experimental.pdb') as infile:
            for line in infile:
                if line.startswith('ATOM'):
                    outfile.write(line)
                if line.startswith('HETATM'):
                    if line[17:20] == 'MSE':
                        atom_name = line[12:17]
                        if atom_name == 'SE   ':
                            atom_name = ' SD  '
                        line_fixed = 'ATOM  ' + line[
                            6:12] + atom_name + 'MET' + line[20:67] + '\n'
                        outfile.write(line_fixed)

    # load the file into prody
    p = prody.parsePDB('no_smet.pdb')
    p = p.select('not hydrogen')

    # get one of the rosetta models
    r = prody.parsePDB('rosetta.pdb')

    # perform an alignment to find out what part of the crystal structure
    # corresponds to the rosetta file
    match = prody.matchChains(r, p, subset='all', overlap=25,
                              pwalign=True)[0][1]
    print len(match)
    prody.writePDB('chain.pdb', match)

    # now clean it up with pdb fixer
    subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb',
                          shell=True)

    # now load it with zam
    p = protein.Protein('output.pdb')
    p.Dehydrogen()
    disulfide_pairs = find_disulfide(p)
    for r1, r2 in disulfide_pairs:
        print '    added disulfide between {} and {}'.format(r1, r2)
        p.Res[r1].FullName = 'CYX'
        p.Res[r2].FullName = 'CYX'
    p.WritePdb('start.pdb')

    # now run tleap
    print '    running tleap'
    run_tleap(disulfide_pairs)
Beispiel #17
0
def prody_contacts(**kwargs):
    """Identify contacts of a target structure with one or more ligands.
    Contacting atoms (or extended subset of atoms, such as residues) are 
    outputted in PDB file format.
    
    :arg target: target PDB identifier or filename
    
    :arg ligand: ligand PDB identifier(s) or filename(s)

    :arg select: atom selection string for target structure
    
    :arg radius: contact radius (Å), default is ``4.0`` 
    
    :arg extend: output same ``'residue'``, ``'chain'``, or ``'segment'`` along 
        with contacting atoms
    
    :arg prefix: prefix for output file, default is *target* filename
    
    :arg suffix: output filename suffix, default is *ligand* filename"""

    import prody
    LOGGER = prody.LOGGER

    target = prody.parsePDB(kwargs['target'])
    title = kwargs.get('prefix') or target.getTitle()
    selstr = kwargs.get('select')
    if selstr:
        target = target.select(selstr)
    contacts = prody.Contacts(target)
    suffix = kwargs.get('suffix', '_contacts')
    extend = kwargs.get('extend')
    radius = float(kwargs.get('radius', 4.0))
    ligands = kwargs.get('ligand')
    if len(ligands) > 1:
        outfn = lambda fn: title + suffix + '_' + fn + '.pdb'
    else:
        outfn = lambda fn: title + suffix + '.pdb'
    for pdb in ligands:
        ligand = prody.parsePDB(pdb)
        sel = contacts(radius, ligand)
        if sel:
            LOGGER.info('{0} atoms from {1} contact {2}.'.format(
                len(sel), pdb, str(target)))
            if extend:
                sel = target.select('same ' + extend + ' as sel', sel=sel)
                LOGGER.info('Selection is extended to {0} atoms of the same '
                            '{1}(s).'.format(len(sel), extend))
            pdbfn = outfn(ligand.getTitle())
            LOGGER.info('Writing contacts into ' + pdbfn)
            prody.writePDB(pdbfn, sel)
Beispiel #18
0
def rmsd(bucket, table_idx, param, input_data):
    '''
        Calculate rmsd and insert the result into database
        
        
        Args:
            table_idx: int, id for native contact table
            param: dict, parameters
                    {
                        'input_docked_foler':'...',
                        'input_crystal_folder':'...',
                    }
            input_data: list  
                    [receptor, chain, resnum ,resname] 

        Returns:

    '''

    try:
        receptor, chain, resnum, resname = input_data
        input_docked_folder = param['input_docked_folder']
        input_crystal_folder = param['input_crystal_folder']
        lig_name = '_'.join([receptor, chain, resnum, resname, 'ligand'
                             ]) + '.pdb'

        input_docked_dir = os.path.join(data_dir, input_docked_folder,
                                        receptor)
        input_docked_path = os.path.join(input_docked_dir, lig_name)

        input_crystal_dir = os.path.join(data_dir, input_crystal_folder,
                                         receptor)
        input_crystal_path = os.path.join(input_crystal_dir, lig_name)

        docked_coords = prody.parsePDB(input_docked_path).getCoordsets()
        crystal_coord = prody.parsePDB(input_crystal_path).getCoords()

        rmsd = np.sqrt(
            np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1),
                    axis=-1))

        # todo (maksym) RMSDs not rmsd
        records = []
        for i, rd in enumerate(rmsd):
            records.append(input_data + [i + 1, rd, 1, 'success'])
        db.insert(table_idx, records, bucket=bucket)
    except Exception as e:
        record = input_data + [1, 0, 0, str(e)]
        records = [record]
        db.insert(table_idx, records, bucket=bucket)
Beispiel #19
0
def prody_contacts(**kwargs):
    """Identify contacts of a target structure with one or more ligands.
    Contacting atoms (or extended subset of atoms, such as residues) are 
    outputted in PDB file format.
    
    :arg target: target PDB identifier or filename
    
    :arg ligand: ligand PDB identifier(s) or filename(s)

    :arg select: atom selection string for target structure
    
    :arg radius: contact radius (Å), default is ``4.0`` 
    
    :arg extend: output same ``'residue'``, ``'chain'``, or ``'segment'`` along 
        with contacting atoms
    
    :arg prefix: prefix for output file, default is *target* filename
    
    :arg suffix: output filename suffix, default is *ligand* filename"""
            
    import prody
    LOGGER = prody.LOGGER

    target = prody.parsePDB(kwargs['target'])
    title = kwargs.get('prefix') or target.getTitle()
    selstr = kwargs.get('select')
    if selstr:
        target = target.select(selstr)
    contacts = prody.Contacts(target)
    suffix = kwargs.get('suffix', '_contacts')
    extend = kwargs.get('extend')
    radius = float(kwargs.get('radius', 4.0))
    ligands = kwargs.get('ligand')
    if len(ligands) > 1:
        outfn = lambda fn: title + suffix + '_' + fn + '.pdb'
    else:
        outfn = lambda fn: title + suffix + '.pdb'
    for pdb in ligands:
        ligand = prody.parsePDB(pdb)
        sel = contacts(radius, ligand)
        if sel:
            LOGGER.info('{0} atoms from {1} contact {2}.'
                        .format(len(sel), pdb, str(target)))
            if extend:
                sel = target.select('same ' + extend + ' as sel', sel=sel)
                LOGGER.info('Selection is extended to {0} atoms of the same '
                            '{1}(s).'.format(len(sel), extend))
            pdbfn = outfn(ligand.getTitle())
            LOGGER.info('Writing contacts into ' + pdbfn)
            prody.writePDB(pdbfn, sel)
Beispiel #20
0
 def Resid(self, pdbFile, pdbChain1, pdbChain2, ligandName, runfolder):
     """
     Return a list of residue in `Receptor` which distance from `Ligand` is less or equal 5 angstroms.
     """
     # print "wthelelelelle"
     print self.runfolder
     os.chdir(runfolder)
     # acpype = glob('*.acpype')
     if pdbChain2 != '':
         a = prody.parsePDB(str(pdbFile)).select('('+pdbChain1+')' + ' and within 5 of chain ' + pdbChain2)
         residList = np.array(list(sorted(set(a.getResnums()))), dtype='str')
         residlist = " ".join(residList)
         # for i in range(1, len(residList)):
         # residlist += ' ' + str(residList[i])
         with open('cutoff-resid-5angstroms', 'w') as residfile:
             residfile.write(residlist)
         return residList, str(runfolder + '/cutoff-resid-5angstroms')
     elif ligandName != '':
         receptor = prody.parsePDB(str(pdbFile))
         Ligand = []
         # for i in range(len(acpype)):
         # Ligand.append(str(acpype[i].strip('.acpype')))
         # print "is it a bug here??"
         # print Ligand
         # ligand = []
         # for i in range(len(Ligand)):
         Ligand.append(prody.parsePDB(str(ligandName) + '.acpype/' + str(ligandName) + '_NEW.pdb'))
         # print ligand
         protein = receptor
         # print "this is protein before add ligand[i]"
         # print protein
         haha = []
         for i in range(len(Ligand)):
             protein += Ligand[i]
             haha = np.array(list(sorted(set(Ligand[i].getResnames()))), dtype='str')
         # print "this is protein after add ligand[i]"
         # print protein
         # print haha
         # print type(haha)
         ligands = ' or resname '.join(haha)
         hoho = protein.select('('+pdbChain1+')' + ' and within 5 of resname ' + ligands)
         residList = list(sorted(set(hoho.getResnums())))
         residList = np.array(residList, dtype='str')
         residlist = " ".join(residList)
         # print residlist
         # for i in range(1, len(residList)):
         # residlist += ' ' + str(residList[i])
         with open('cutoff-resid-5angstroms', 'w') as residfile:
             residfile.write(residlist)
         return residList, str(runfolder + '/cutoff-resid-5angstroms')
Beispiel #21
0
def rmsd(reorder_outpath, dock_outpath, init='rmsd_init'):

    init = eval(init)
    reorder_path = os.path.join(init.data_dir, reorder_outpath)
    dock_path = os.path.join(init.data_dir, dock_outpath)

    docked_coords = prody.parsePDB(dock_path).getCoordsets()
    crystal_coords = prody.parsePDB(reorder_path).getCoords()

    rmsd = np.sqrt(
        np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1),
                axis=-1))

    return [list(rmsd)]
Beispiel #22
0
def overlap(reorder_path, dock_path):

    docked_coords = prody.parsePDB(dock_path).getCoordsets()
    crystal_coords = prody.parsePDB(reorder_path).getCoords()

    expanded_docked = np.expand_dims(docked_coords, -2)
    diff = expanded_docked - crystal_coords
    distance = np.sqrt(np.sum(np.power(diff, 2), axis=-1))

    all_clash = (distance < clash_cutoff_A).astype(float)
    atom_clash = (np.sum(all_clash, axis=-1) > 0).astype(float)
    position_clash_ratio = np.mean(atom_clash, axis=-1)

    return [list(position_clash_ratio)]
Beispiel #23
0
def native_contact(rec_path, reorder_path, dock_path):
    """
    calculate native contact ratio for the docking result

    args:
        rec_path:: str
            path of splited receptor

        reorder_path:: str
            path of reorder ligand

        dock_path:: str
            path of docking result
        
    returns:
        native_contact:: float
            native contact value
    """
    parsed_docked =  prody.parsePDB(dock_path).select('not hydrogen')
    parsed_crystal = prody.parsePDB(reorder_path).select('not hydrogen')
    parsed_rec = prody.parsePDB(rec_path).select('not hydrogen')


    cry_atom_num = parsed_crystal.numAtoms()
    lig_atom_num = parsed_docked.numAtoms()

    assert cry_atom_num == lig_atom_num

    docked_coords = parsed_docked.getCoordsets()
    crystal_coord = parsed_crystal.getCoords()
    rec_coord = parsed_rec.getCoords()

    exp_crystal_coord = np.expand_dims(crystal_coord, -2)
    cry_diff = exp_crystal_coord - rec_coord
    cry_distance = np.sqrt(np.sum(np.square(cry_diff), axis=-1))

    exp_docked_coords = np.expand_dims(docked_coords, -2)
    docked_diff = exp_docked_coords - rec_coord
    docked_distance = np.sqrt(np.sum(np.square(docked_diff),axis=-1))

    cry_contact = (cry_distance < distance_threshold).astype(int)
    
    num_contact = np.sum(cry_contact).astype(float)

    lig_contact = (docked_distance < distance_threshold).astype(int)

    contact_ratio = np.sum(cry_contact * lig_contact, axis=(-1,-2)) / num_contact

    return [list(contact_ratio)]
Beispiel #24
0
def clean_pair(bound_pdb, bound_chains, peptide_chains, unbound_pdb, unbound_chains):
    # 2
    bound_receptor = parsePDB(bound_pdb, chain=bound_chains+peptide_chains)
    
    writePDB('b.pdb',bound_receptor.select('protein and chain %s' % ' '.join(list(bound_chains))))
    writePDB('p.pdb',bound_receptor.select('protein and chain %s' % peptide_chains))
    #3
    unbound_receptor = parsePDB(unbound_pdb, chain=unbound_chains)
    alignment_results = compare.matchAlign(unbound_receptor, bound_receptor)
    unbound_receptor = alignment_results[0]
    writePDB('unb.pdb',unbound_receptor.select('protein'))
    writePDB('b.pdb',bound_receptor.select('protein and chain %s' % ' '.join(list(bound_chains))))
    writePDB('p.pdb',bound_receptor.select('protein and chain %s' % peptide_chains))
    writePDB('up.pdb',unbound_receptor.select('protein') | bound_receptor.select('protein and chain %s' % peptide_chains))
    return 0 
Beispiel #25
0
def align_by_resid(input_pdb_path, target_pdb_path):
    input_mol = parsePDB(input_pdb_path)
    target_mol = parsePDB(target_pdb_path)
    target_resid, input_resid = target_mol.select(
        'calpha').getResnums(), input_mol.select('calpha').getResnums()
    target_index = np.where(np.in1d(input_resid, target_resid))[0]
    native_index = np.where(np.in1d(target_resid, input_resid))[0]

    if len(input_mol.select('name CA').getSequence()) < 25 or len(
            target_mol.select('name CA').getSequence()) < 25 or len(
                target_index) < 25:
        return None, None, None
    input_mol = input_mol.select(
        'resindex ' + reduce(lambda a, b: str(a) + ' ' + str(b), target_index))
    return input_mol, target_index, native_index
Beispiel #26
0
def compare_pdb_files(file1, file2):
    """Returns the RMSD between two PDB files of the same protein.

    Args:
        file1 (str): Path to first PDB file.
        file2 (str): Path to second PDB file. Must be the same protein as in file1.

    Returns:
        float: Root Mean Squared Deviation (RMSD) between the two structures.
    """
    s1 = pr.parsePDB(file1)
    s2 = pr.parsePDB(file2)
    transformation = pr.calcTransformation(s1, s2)
    s1_aligned = transformation.apply(s1)
    return pr.calcRMSD(s1_aligned, s2)
Beispiel #27
0
def prody_align(opt):
    """Align models in a PDB file or a PDB file onto others."""
            
    import prody
    LOGGER = prody.LOGGER

    args = opt.pdb
    if len(args) == 1:
        pdb = args[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        selstr, prefix, model = opt.select, opt.prefix, opt.model
        pdb = prody.parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            opt.subparser.error('Selection {0:s} do not match any atoms.'
                               .format(repr(selstr)))
        LOGGER.info('{0:d} atoms will be used for alignment.'
                    .format(len(pdbselect)))
        pdbselect.setACSIndex(model-1)
        prody.printRMSD(pdbselect, msg='Before alignment ')
        prody.alignCoordsets(pdbselect)
        prody.printRMSD(pdbselect, msg='After alignment  ')
        if prefix == '':
            prefix = pdb.getTitle() + '_aligned'
        outfn = prefix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        prody.writePDB(outfn, pdb)
    else:
        reffn = args.pop(0)
        seqid=opt.seqid
        overlap=opt.overlap
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = prody.parsePDB(reffn)
        for arg in args:
            if arg == reffn:
                continue
            if '_aligned.pdb' in arg:
                continue
            pdb = prody.parsePDB(arg)
            result = prody.matchAlign(pdb, ref, seqid=seqid, overlap=overlap, 
                                      tarsel=opt.select, allcsets=True,
                                      cslabel='Model', csincr=1) 
            if result:
                outfn = pdb.getTitle() + '_aligned.pdb'
                LOGGER.info('Writing file: ' + outfn)
                prody.writePDB(outfn, pdb)
            else:
                LOGGER.warning('Failed to align ' + arg)
Beispiel #28
0
def calc_pocket_rmsd(rec, lig, root):
    """
    Calculate difference between the ligand reference receptor and
    the receptor it is being docked into.

    From original script by David Koes
    """
    ligrec = lig.replace("LIG_aligned.sdf", "PRO.pdb")
    rec = prody.parsePDB(os.path.join(root, rec))
    ligrec = prody.parsePDB(os.path.join(root, ligrec))
    lig = next(pybel.readfile("sdf", os.path.join(root, lig)))
    c = np.array([a.coords for a in lig.atoms])
    nearby = rec.select("protein and same residue as within 3.5 of point",
                        point=c)
    matches = []
    for cutoff in range(90, 0, -10):
        # can't just set a low cutoff since we'll end up with bad alignments
        # try a whole bunch of alignments to maximize the likelihood we get the right one
        m = prody.matchChains(rec,
                              ligrec,
                              subset="all",
                              overlap=cutoff,
                              seqid=cutoff,
                              pwalign=True)
        if m:
            matches += m
    minrmsd = np.inf
    minbackrmsd = np.inf
    for rmap, lrmap, _, _ in matches:
        try:
            closeatoms = set(nearby.getIndices())
            lratoms = []
            ratoms = []
            for i, idx in enumerate(rmap.getIndices()):
                if idx in closeatoms:
                    lratoms.append(lrmap.getIndices()[i])
                    ratoms.append(idx)
            if len(lratoms) == 0:
                continue
            rmsd = prody.calcRMSD(rec[ratoms], ligrec[lratoms])
            backrmsd = prody.calcRMSD(rec[ratoms] & rec.ca,
                                      ligrec[lratoms] & ligrec.ca)
            if rmsd < minrmsd:
                minrmsd = rmsd
                minbackrmsd = backrmsd
        except:
            pass
    return minrmsd, minbackrmsd
Beispiel #29
0
def reorder(bucket, table_idx, param,
            input_data):  # todo(maksym) smina_reorder
    try:
        receptor, chain, resnum, resname = input_data

        output_folder = param['output_folder']
        output_folder = '{}_{}'.format(table_idx, output_folder)
        input_lig_folder = param['input_ligand_folder']
        input_rec_folder = param['input_receptor_folder']
        smina_pm = smina_param()
        smina_pm.param_load(param['smina_param'])

        out_dir = os.path.join(data_dir, output_folder, receptor)
        _makedir(out_dir)
        out_name = '_'.join(input_data + ['ligand']) + '.pdb'
        out_path = os.path.join(out_dir, out_name)

        input_lig_dir = os.path.join(data_dir, input_lig_folder,
                                     receptor)  # lig_dir = input_lig_dir
        lig_name = '_'.join(input_data + ['ligand']) + '.pdb'
        input_lig_path = os.path.join(input_lig_dir, lig_name)

        input_rec_dir = os.path.join(data_dir, input_rec_folder,
                                     receptor)  # rec_dir = input_rec_dir
        rec_name = '_'.join(input_data + ['receptor']) + '.pdb'
        input_rec_path = os.path.join(input_rec_dir, rec_name)

        kw = {
            'receptor': input_rec_path,
            'ligand': input_lig_path,
            'autobox_ligand': input_lig_path,
            'out': out_path
        }

        cmd = smina_pm.make_command(**kw)  # todo(maksym) smina_cmd

        cl = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        cl.wait()
        prody.parsePDB(out_path)

        record = input_data + [1, 'success']
        records = [record]
        db.insert(table_idx, records, bucket=bucket)

    except Exception as e:
        record = input_data + [0, str(e)]
        records = [record]
        db.insert(table_idx, records, bucket=bucket)
def preprocess_single(model, chain_name):

    hv = model.getHierView()
    for chain in hv.iterChains():
        chain.setChids(chain_name)

    prody.writePDB("tmp.pdb", model)
    model = prody.parsePDB("tmp.pdb")
    check_call(["rm", "tmp.pdb"])

    pos = 1
    apos = 1

    hv = model.getHierView()
    for chain in hv.iterChains():
        # print "chain : " , chain
        for res in chain.iterResidues():
            res.setResnums(pos)
            for atom in res:
                atom.setSerial(apos)
                apos += 1
            if len(res.getIcode()) == 0:
                pos += 1
    hv.update()
    return model
Beispiel #31
0
    def __init__(self, pdbid, cfg, ):
        self.pdbid = pdbid

        self.config = cfg
        self.svm = config.trainClassifier(self.config)

        fn_pattern = BOUND_FILENAME_PATTERN if self.config.testing.is_bound \
                else UNBOUND_FILENAME_PATTERN
        self.pdb_filename = fn_pattern.format(pdb=self.pdbid)
        self.receptor = prody.parsePDB(self.pdb_filename).protein.noh

        self.ddgs = self.config.testing.label_data_df.ix[self.pdbid]
        self.confidence = pd.Series(
                data=config.predictClassifier(self.config),
                index=self.config.testing.label_data_df.index,
                ).ix[self.pdbid]

        self.mask_binding = self.ddgs > self.config.testing.ddg_cutoff
        self.mask_positive = self.confidence > 0

        self.surface_resnums = self.ddgs.index
        self.binding_resnums = \
                self.surface_resnums[self.mask_binding]
        self.positive_resnums = \
                self.surface_resnums[self.mask_positive]

        def receptor_residues(resnums):
            return self.receptor.select(
                    'resnum %s' % ' '.join(map(str, resnums)))

        self.surface_residues = receptor_residues(self.surface_resnums)
        self.positive_residues = receptor_residues(self.positive_resnums)
Beispiel #32
0
def phi2pdb(base_pdb, phi, save_path="./"):
    pdb = prody.parsePDB(base_pdb)
    atoms = [a for a in pdb]
    for a in pdb:
        a.setBeta(0.0)

    phif = open(phi, "r")
    phic = phif.readlines()
    phif.close()

    for l in phic:
        x,y,z,k = l.strip().split(",")
        x = np.float(x)
        y = np.float(y)
        z = np.float(z)
        k = np.float(k)
        
        for a in xrange(len(atoms)):
            X,Y,Z = atoms[a].getCoords()
            if X==x and Y==y and Z==z:
                atoms[a].setBeta(k)
                atoms.pop(a)
                break

    out_pdb = os.path.join(save_path, phi.split(os.sep)[-1][:-4]+".pdb")
    prody.writePDB(out_pdb, pdb)

    return out_pdb
Beispiel #33
0
def getPairInformation(pdbid,
                       reference_chain,
                       pair_chain,
                       cutoff=5,
                       covalent_bond_cutoff=5):
    """
    1. reads pdb id from file
    2. selects atoms from pair of chains within cutoff
    # draws selection within interface (simplest possible view)
    reference == oncogene,
    pair == peptide
    """
    atoms = prody.parsePDB(pdbid)  # TODO: turn off debug
    reference_atoms = atoms.select("chain %s and not water" % reference_chain)
    pair_atoms = atoms.select("chain %s and not water" % pair_chain)
    # next try to select everything
    ref_contacts = prody.measure.contacts.Contacts(reference_atoms)
    ref_selection = ref_contacts.select(cutoff,
                                        pair_atoms)  # we need these atoms

    pair_contacts = prody.measure.contacts.Contacts(pair_atoms)
    pair_selection = pair_contacts.select(cutoff, reference_atoms)  # and these
    sulfur_pairs = []
    ## 1. select Cys atoms on oncogene
    for (r, ch2, distance) in prody.measure.contacts.findNeighbors(
            reference_atoms, covalent_bond_cutoff, pair_atoms):
        if r.getResname() in 'CYS':  # and r.getElement() in ['S'] :
            sulfur_pairs.append((r.getSerial(), ch2.getSerial()))
    # filtering: if there is no Cys, return nothing
    if len(sulfur_pairs) < 1:
        return None
    return (pdbid, reference_chain, set(ref_selection.getResnums()),
            pair_chain, set(pair_selection.getResnums()), sulfur_pairs)
Beispiel #34
0
 def __init__(self, pdbid, context='bound'):
     self.pdbid = pdbid.upper()
     self.context = context
     bound = True if context=='bound' else False
     
     self.DATA_PATH = j(PEPTIDB_DATA_PATH, self.context)
     self.PDB_DATA_PATH = j(self.DATA_PATH, self.context+'Set', 'mainChain' if bound else '')
     self.FTMAP_DATA_PATH = j(self.DATA_PATH, 'FTMapAnalysis', 'ftmapData')
     self.CONSURF_DATA_PATH = j(self.DATA_PATH, 'ConSurfAnalysis', 'data')
     
     self.receptor_filename = j(self.PDB_DATA_PATH, '%s.pdb' % self.pdbid)
     self.receptor_atoms = prody.parsePDB(self.receptor_filename).protein
     
     self.receptor_chain = self.receptor_atoms.getHierView().iterChains().next()
     #print "###%s###" % self.receptor_filename
     self.resnum_index = pd.MultiIndex.from_tuples(
                         zip(
                             [pdbid]*self.receptor_chain.numResidues(),
                             self.receptor_chain.ca.getResnums()
                         ), 
                             names = [
                                 'PDB identifier', 
                                 'Residue number',
                                     ]
                             )
     self.df = pd.DataFrame(index=self.resnum_index)
     print self.pdbid
Beispiel #35
0
def prody_biomol(pdbname,**kwargs):
    """Generate biomolecule coordinates.

    :arg pdb:  :term:`PDB` identifier or filename
    
    :arg prefix: prefix for output files, default is :file:`_biomol`
    
    :arg biomol: index of the biomolecule, by default all are generated"""
        
    import prody
    LOGGER = prody.LOGGER
    prefix, biomol = kwargs.get('prefix',None), kwargs.get('biomol')
    pdb, header = prody.parsePDB(pdbname, header=True)
    if not prefix:
        prefix = pdb.getTitle()
        
    biomols = prody.buildBiomolecules(header, pdb, biomol=biomol)
    if not isinstance(biomols, list):
        biomols = [biomols]
    
    for i, biomol in enumerate(biomols):
        if isinstance(biomol, prody.Atomic):
            outfn = '{0:s}_biomol_{1:d}.pdb'.format(prefix, i+1)
            LOGGER.info('Writing {0:s}'.format(outfn))
            prody.writePDB(outfn, biomol)
        elif isinstance(biomol, tuple):
            for j, part in enumerate(biomol):
                outfn = ('{0:s}_biomol_{1:d}_part_{2:d}.pdb'
                         .format(prefix, i+1, j+1))
                LOGGER.info('Writing {0:s}'.format(outfn))
                prody.writePDB(outfn, part)
Beispiel #36
0
def main():
    import sys
    import getopt
    import csv
    import prody as pr


    #usage = \
    """

    Copyright (c) 2007 Bosco Ho

    Calculates the total Accessible Surface Area (ASA) of atoms in a
    PDB file.

    Usage: asa.py -s n_sphere in_pdb [out_pdb]

    - out_pdb    PDB file in which the atomic ASA values are written
                 to the b-factor column.

    -s n_sphere  number of points used in generating the spherical
                 dot-density for the calculation (default=960). The
                 more points, the more accurate (but slower) the
                 calculation.

    """

    #opts, args = getopt.getopt(sys.argv[1:], "n:")
    #if len(args) < 1:
    #    print usage
    #    return



    #mol = molecule.Molecule(args[0])
    #pdb = molecule.Molecule('dimers/1R0R.pdb')
    pdb = pr.parsePDB('dimers/1R0R.pdb')

    #atoms = mol.atoms()
    #molecule.add_radii(atoms)

    data = []

    #for o, a in opts:
    #    if '-n' in o:
    #        n_sphere = int(a)
    #        print "Points on sphere: ", n_sphere
    #
    #n_sphere = [500]
    n_sphere = range(10,2000,10)
    for n in n_sphere:
        asas = calculate_asa_np(pdb, 1.4, n)
        data.append(asas)
        #print "%i, %.1f angstrom squared." % n, sum(asas)
        print(str(n) + ", " + str(sum(asas)) + " angstrom squared.")

    f_test = open('perturbation_analysis.csv','w')
    c = csv.writer(f_test)
    for i in xrange(len(data)):
        c.writerow(data[i])
def computeEachAtomAllTrajectoriesMean(trajectories):
    """
        Computes the mean of each atom's position in all the trajectories
    """
    ri = []
    riMeasures = []

    for i, traj in enumerate(trajectories):
        #trajectory = prody.parsePDB(traj)
        trajectory = prody.parsePDB(traj, subset='calpha')
        coordinates = trajectory.getCoordsets()

        ensembleTrajectory = prody.PDBEnsemble("Complex")
        ensembleTrajectory.setAtoms(trajectory)
        ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:]) 
        ensembleTrajectory.setCoords(coordinates[0]) #reference
        ensembleTrajectory.superpose()
        #ensembleTrajectory = trajectory
        sri, sriMeasures = addTrajectoryCoordinates(ensembleTrajectory.getCoordsets(), trajectory.numAtoms())

        #sri, sriMeasures = addTrajectoryCoordinates(trajectory.getCoordsets(), trajectory.numAtoms())
        ri.append(sri)
        riMeasures.append(sriMeasures)

    return average(ri, riMeasures)
def computeEachAtomsUnnormalisedAutocorrelation(trajectories, avgR):
    """
        Computes the autocorrelation with the formula:
            C(k) = 1/[(n-k)] \sum_{t=1}^{n-k} (Xt - mu)(Xt+k - mu)
        To normalise it wihitn [-1:1]
            c(k) = C(k) / var
        When the true mean \mu and variance \sigma^2 are known, this estimate is unbiased.
    """
    rirj = []
    rirjMeasures = []

    for i, traj in enumerate(trajectories):
        #trajectory = prody.parsePDB(traj)
        trajectory = prody.parsePDB(traj, subset='calpha')
        coordinates = trajectory.getCoordsets()

        #superpose
        ensembleTrajectory = prody.PDBEnsemble("Complex")
        ensembleTrajectory.setAtoms(trajectory)
        ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:])
        ensembleTrajectory.setCoords(coordinates[0]) #reference
        ensembleTrajectory.superpose()

        #ensembleTrajectory = trajectory

        srirj, sMeasures = computeEachAtomsUnnormalisedAutocorrelationForASingleTrajectory(ensembleTrajectory.getCoordsets(), avgR)
        rirj.append(srirj)
        rirjMeasures.append(sMeasures)

    rirj = sumOverTrajectories(rirj)
    rirjMeasures = sumOverTrajectories(rirjMeasures)

    return rirj/rirjMeasures
def find_close(native_name, traj_name, skip_frames):
    native = prody.parsePDB(native_name)
    traj = prody.parsePDB(traj_name)

    ensemble = prody.Ensemble('ensemble')
    ensemble.setCoords(native.getCoords())
    ensemble.addCoordset(traj.getCoordsets()[skip_frames:, ...])  # skip the first 10 frames
    ensemble.superpose()

    native_coords = native.getCoords()
    ensemble_coords = ensemble.getCoordsets()

    diff2 = (ensemble_coords - native_coords) ** 2
    diff2 = numpy.sum(diff2, axis=2)
    min_dev = numpy.min(diff2, axis=0)
    return numpy.sqrt(numpy.sum(min_dev) / float(min_dev.shape[0]))
    def generate_neighborhood_atom_list(input_pdbs, neighbors, acceptable_atoms_wt_set, acceptable_atoms_mut_set, input_type):
        coordinates = []
        for input_pdb in input_pdbs:
            if 'WT.' not in input_pdb:
                atom_list = []
                neighborhood = prody.parsePDB(input_pdb)
                neighborhood_hv = neighborhood.getHierView()
                res_list = [neighborhood_hv[neighbor[1], neighbor[0][1]] for neighbor in neighbors]

                for res in res_list:
                    # Check if numbering should be for WT or Mutant
                    # Check that residues are present in acceptable_residues
                    # Check that atom coordinates are present in acceptable_atoms
                    for atom in res:
                        if input_type == 'Mutant PDB':
                            # print (mut_to_wt_chains[res.getChid()], res.getResname(),
                            #     int(residue_maps_reverse[(res.getChid(), mut_to_wt_chains[res.getChid()])][
                            #             '%s %s ' % (res.getChid(), ('   ' + str(res.getResnum()))[-3:])].split()[1]),
                            #     atom.getName())
                            if (res.getChid(), res.getResname(),int(res.getResnum()), atom.getName()) in acceptable_atoms_mut_set:
                                if atom.getElement() != 'H':
                                    atom_list.append(str(atom.getIndex()))

                        else:
                            if (res.getChid(), res.getResname(), res.getResnum(), atom.getName()) in acceptable_atoms_wt_set:
                                if atom.getElement() != 'H':
                                    atom_list.append(str(atom.getIndex()))

                coordinates.append(neighborhood.select('index ' + ' '.join(atom_list)))

        return coordinates
    def generate_point_atom_list(input_pdbs, mutations, acceptable_atoms_wt_set, acceptable_atoms_mut_set, mut_key_dict, input_type):
        mutation_dict = {}

        for counter, mutation in enumerate(mutations):
            temp = []
            for input_pdb in input_pdbs:
                if 'WT.' not in input_pdb:
                    atom_list = []
                    point_mutant = prody.parsePDB(input_pdb)
                    point_mutant_hv = point_mutant.getHierView()
                    res_list = [point_mutant_hv[mutation[1], int(mutation[0])]]
                    for res in res_list:
                        # Check if numbering should be for WT or Mutant
                        # Check that residues are present in acceptable_residues
                        # Check that atom coordinates are present in acceptable_atoms
                        for atom in res:
                            if input_type == 'Mutant PDB':
                                if (res.getChid(), res.getResname(),int(res.getResnum()), atom.getName()) in acceptable_atoms_mut_set:
                                    if atom.getElement() != 'H':
                                        atom_list.append(str(atom.getIndex()))
                            if input_type == 'RosettaOut':
                                if (res.getChid(), res.getResname(), res.getResnum(), atom.getName()) in acceptable_atoms_wt_set:
                                    if atom.getElement() != 'H':
                                        atom_list.append(str(atom.getIndex()))

                    temp.append(point_mutant.select('index ' + ' '.join(atom_list)))
            if input_type == 'Mutant PDB':
                mutation_dict[mut_key_dict[mutation[1] + str(mutation[0])]] = temp
            if input_type == 'RosettaOut':
                mutation_dict[mutation[1] + str(mutation[0])] = temp
        return mutation_dict
Beispiel #42
0
def runThrough(pfile):
    # initial setup
    print "Running through " + pfile + "..."
    numMdls = getNumMdls(pfile)
    #print numMdls
    appf = pr.parsePDB(pfile, model=numMdls, secondary=True, chain='A', altLoc=False)
    los = sheets.initializeList(pfile)
    parseHelices(appf)
    if(los != None):
        parseSheets(appf,los)

    # get them means
    COLUMN_D_ON_AV  = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_D_ON]
    COLUMN_D_OH_AV  = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_D_OH]
    COLUMN_A_NHO_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_A_NHO]
    COLUMN_A_HOC_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_A_HOC]
    COLUMN_BETA_AV  = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_BETA]
    COLUMN_GAMMA_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_GAMMA]

    # get the std devs, nasty shit
    COLUMN_D_ON_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_D_ON]
    COLUMN_D_OH_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_D_OH]
    COLUMN_A_NHO_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_A_NHO]
    COLUMN_A_HOC_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_A_HOC]
    COLUMN_BETA_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_BETA]
    COLUMN_GAMMA_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_GAMMA]

    TABLE = [COLUMN_D_ON_AV, COLUMN_D_OH_AV, COLUMN_A_NHO_AV, COLUMN_A_HOC_AV, COLUMN_BETA_AV, COLUMN_GAMMA_AV]
    STDS = [COLUMN_D_ON_STD, COLUMN_D_OH_STD, COLUMN_A_NHO_STD, COLUMN_A_HOC_STD, COLUMN_BETA_STD, COLUMN_GAMMA_STD]
    #print '        D_ON          D_OH      ANGLE(NHO)    ANGLE(HOC)        BETA         GAMMA   '
    #print np.array(TABLE).T
    #print np.array(STDS).T
    return (TABLE,STDS)
Beispiel #43
0
def test_sasa_3():
    ag = prody.parsePDB(TEST_DATA / '1atom.pdb')
    sasa = calc_sasa(ag,
                     normalize=False,
                     change_radii={'N': 0.3},
                     probe_radius=0.001)
    assert abs(sasa[0] - 4 * 3.14 * 0.3**2) < 0.1
Beispiel #44
0
def get_voxel(input_path, buffer, width):
    input_mol = parsePDB(input_path)
    input_mol = input_mol.select(
        'element C or element N or element O or element S')
    occus = make_voxel(input_mol=input_mol, buffer=buffer, width=width)
    return occus, input_mol.select('name CA').getResnames(), input_mol.select(
        'name CA').getResnums()
Beispiel #45
0
def align_fasta(input_pdb_path, target_fasta_path):
    pdb = parsePDB(input_pdb_path)
    input_fasta_path = tempfile.mktemp(suffix='.fasta')
    f = open(input_fasta_path, 'w')
    f.write('>temp\n')
    if len(pdb.select('name CA').getSequence()) < 25:
        return None, None, None
    else:
        f.write(reduce(lambda a, b: a + b,
                       pdb.select('name CA').getSequence()))
        f.close()
        needle_path = tempfile.mktemp(suffix='.needle')
        cmd = [
            'needle', '-outfile', needle_path, '-asequence', input_fasta_path,
            '-bsequence', target_fasta_path, '-gapopen', '10', '-gapextend',
            '0.5'
        ]
        subprocess.call(cmd)
        needle_result = list(AlignIO.parse(needle_path, 'emboss'))[0]
        input_seq, target_seq = np.array(list(str(
            needle_result[0].seq))), np.array(list(str(needle_result[1].seq)))
        input_seq, target_seq = input_seq[np.where(
            target_seq != '-')], target_seq[np.where(input_seq != '-')]
        input_align_indices = np.where(target_seq != '-')[0]
        target_align_indices = np.where(input_seq != '-')[0]
        align_pdb = pdb.select(
            'resindex ' +
            reduce(lambda a, b: str(a) + ' ' + str(b), input_align_indices))
        input_mol = input_mol.select(
            'element C or element N or element O or element S')
        return align_pdb, input_align_indices, target_align_indices
Beispiel #46
0
def inline_fitness(pdb_id, verbose):

    structure = parsePDB(pdb_id)

    for chain in structure.iterChains():

        if not is_rna(chain): continue

        for residue in chain.iterResidues():

            try:
                fitness = calc_inline_fitness(residue, verbose)
            except AttributeError:
                # end of the chain
                continue

            # pos = the position *downstream* of the examined
            # internucleotide.

            chain_id = chain.getChid()
            res_num = residue.getResnum()
            res_id = residue.getResname()

            fields = (chain_id, res_id, res_num, fitness)

            print '\t'.join(map(str, fields))
Beispiel #47
0
def prody_biomol(opt):
    """Generate biomolecule coordinates based on command line arguments."""
        
    import prody
    LOGGER = prody.LOGGER
    prefix, biomol = opt.prefix, opt.biomol
    pdb, header = prody.parsePDB(opt.pdb, header=True)
    if not prefix:
        prefix = pdb.getTitle()
        
    biomols = prody.buildBiomolecules(header, pdb, biomol=biomol)
    if not isinstance(biomols, list):
        biomols = [biomols]
    
    for i, biomol in enumerate(biomols):
        if isinstance(biomol, prody.Atomic):
            outfn = '{0:s}_biomol_{1:d}.pdb'.format(prefix, i+1)
            LOGGER.info('Writing {0:s}'.format(outfn))
            prody.writePDB(outfn, biomol)
        elif isinstance(biomol, tuple):
            for j, part in enumerate(biomol):
                outfn = ('{0:s}_biomol_{1:d}_part_{2:d}.pdb'
                         .format(prefix, i+1, j+1))
                LOGGER.info('Writing {0:s}'.format(outfn))
                prody.writePDB(outfn, part)
Beispiel #48
0
    def _generate_sidechains_scwrl(self):
        if not self.rec is None:
            rec = self.rec.copy()
            rec.setChids('A')
            lig = self._tpl.copy()
            rec = BasePDB(ag=rec)
            lig = BasePDB(ag=lig)
            merged = rec.add_mol(lig, keep_resi=False, keep_chains=True)
            merged.save(self._mrg_file)

            self._make_scwrl_sequence_file()
            call = [
                define.SCWRL_EXE, '-h', '-i', self._mrg_file, '-o',
                self._scw_file, '-s', self._seq_file
            ]
        else:
            prody.writePDB(self._mrg_file, self._tpl)
            call = [
                define.SCWRL_EXE, '-h', '-i', self._mrg_file, '-o',
                self._scw_file
            ]

        # scwrl wants rosetta hydrogen naming
        BasePDB(self._mrg_file).to_rosetta().save(self._mrg_file)

        helpers.shell_call(call)

        pep = prody.parsePDB(self._scw_file)

        # extract peptide and renumber
        pep = BasePDB(ag=pep.select('chain B').copy()).renumber(
            keep_resi=False).ag
        self.pep = pep
Beispiel #49
0
def fix_openmm():
    # get the whole crystal structure
    # get only the ATOM records
    # and HETAM records for MSE
    # convert MSE to MET
    with open('no_smet.pdb', 'w') as outfile:
        with open('experimental.pdb') as infile:
            for line in infile:
                if line.startswith('ATOM'):
                    outfile.write(line)
                if line.startswith('HETATM'):
                    if line[17:20] == 'MSE':
                        atom_name = line[12:17]
                        if atom_name == 'SE   ':
                            atom_name = ' SD  '
                        line_fixed = 'ATOM  ' + line[6:12] + atom_name + 'MET' + line[20:67] + '\n'
                        outfile.write(line_fixed)

    # load the file into prody
    p = prody.parsePDB('no_smet.pdb')
    p = p.select('not hydrogen')

    # get one of the rosetta models
    r = prody.parsePDB('rosetta.pdb')

    # perform an alignment to find out what part of the crystal structure
    # corresponds to the rosetta file
    match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1]
    print len(match)
    prody.writePDB('chain.pdb', match)

    # now clean it up with pdb fixer
    subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True)

    # now load it with zam
    p = protein.Protein('output.pdb')
    p.Dehydrogen()
    disulfide_pairs = find_disulfide(p)
    for r1, r2 in disulfide_pairs:
        print '    added disulfide between {} and {}'.format(r1, r2)
        p.Res[r1].FullName = 'CYX'
        p.Res[r2].FullName = 'CYX'
    p.WritePdb('start.pdb')

    # now run tleap
    print '    running tleap'
    run_tleap(disulfide_pairs)
Beispiel #50
0
def prody_catdcd(opt):
    """Concatenate DCD files."""
    
    import prody
    LOGGER = prody.LOGGER
    if opt.num:
        num = [] 
        for dcd in opt.dcd:
            dcd = prody.DCDFile(dcd)
            num.append(dcd.numFrames())
        for n in num:
            print(n)
        print(sum(num))
        return
    align = opt.align
    ag = opt.psf or opt.pdb
    if ag:
        if os.path.splitext(ag)[1].lower() == '.psf':
            ag = prody.parsePSF(ag)
        else:
            ag = prody.parsePDB(ag)
    elif align:
        raise ValueError('one of PSF or PDB files must be provided for '
                         'align option to work')
    
    dcd = opt.dcd
    traj = prody.Trajectory(dcd.pop(0))
    while dcd:
        traj.addFile(dcd.pop(0))
    if ag:
        traj.setAtoms(ag)
        select = traj.select(opt.select)
        LOGGER.info('{0:d} atoms are selected for writing output.'
                    .format(len(select)))
        if align:
            _ = traj.select(align)
            LOGGER.info('{0:d} atoms are selected for aligning frames.'
                        .format(len(_)))

    out = prody.DCDFile(opt.output, 'w')
    count = 0
    goto = False
    if opt.stride != 1:
        goto = True
    slc = slice(opt.first, opt.last, opt.stride).indices(len(traj)+1)
    for i in range(*slc):
        if goto:
            traj.goto(i)
        frame = traj.next()
        if align:
            frame.superpose()
            out.write(select._getCoords(), frame.getUnitcell())
        else:
            out.write(frame._getCoords(), frame.getUnitcell())
        count += 1
    traj.close()
    out.close()
    LOGGER.info("{0:d} frames are written into '{1:s}'."
                .format(count, opt.output))
def combine_structures(directory_with_pdbs, output_filename):
    search_string = join(directory_with_pdbs, '*.pdb')
    path_list = glob(search_string)

    if len(path_list) > MAX_FRAMES:
        raise RuntimeError('Got %d frames, but only up to %d frames are allowed.')
    else:
        pass

    atom_group = parsePDB(path_list[0])
    for i, path in enumerate(path_list):
        if i == 0:
            continue
        else:
            p = parsePDB(path)
            atom_group.addCoordset(p)
    writePDB(output_filename, atom_group)
Beispiel #52
0
    def load(self,filename):
        self.filname = filename
        self.model = prody.parsePDB(self.filname, model=1) 
        print ("self.center",self.center)
        if self.center :
#            c = calcCenter(self.model) 
            moveAtoms(self.model, to=numpy.zeros(3))
        self.ca_model = self.model.select('protein and name CA')#what about DNA
Beispiel #53
0
def prody_align(opt):
    """Align models in a PDB file or a PDB file onto others."""
            
    import prody
    LOGGER = prody.LOGGER

    args = opt.pdb
    if len(args) == 1:
        pdb = args[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        selstr, prefix, model = opt.select, opt.prefix, opt.model
        pdb = prody.parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warning('Selection "{0:s}" do not match any atoms.'
                           .format(selstr))
            sys.exit(-1)
        LOGGER.info('{0:d} atoms will be used for alignment.'
                               .format(len(pdbselect)))
        pdb.setACSIndex(model-1)
        prody.alignCoordsets(pdb, selstr=selstr)
        rmsd = prody.calcRMSD(pdb)
        LOGGER.info('Max RMSD: {0:0.2f} Mean RMSD: {1:0.2f}'
              .format(rmsd.max(), rmsd.mean()))
        if prefix == '':
            prefix = pdb.getTitle() + '_aligned'
        outfn = prefix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        prody.writePDB(outfn, pdb)
    else:
        reffn = args.pop(0)
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = prody.parsePDB(reffn)
        for arg in args:
            if arg == reffn:
                continue
            if '_aligned.pdb' in arg:
                continue
            pdb = prody.parsePDB(arg)
            if prody.matchAlign(pdb, ref):
                outfn = pdb.getTitle() + '_aligned.pdb'
                LOGGER.info('Writing file: ' + outfn)
                prody.writePDB(outfn, pdb)
            else:
                LOGGER.warning('Failed to align ' + arg)
    def test_calc_matrix(self):
        pdb_structure = prody.parsePDB("data/3_models.pdb")
        expected = [ 35.01002624,  47.60315215,  88.64981522,  32.90471145,  87.13023459,  85.76106107]
        product_matrix = DihedralRMSDMatrixCalculator.build(pdb_structure)
#         print "out", product_matrix.get_data()
#         print "out", product_matrix.get_data()
#         print product_matrix.get_data()[0]
#         print product_matrix[0,1]
        numpy.testing.assert_almost_equal(expected, product_matrix.get_data(),8)
Beispiel #55
0
 def conservation(self,):
     consurf_pdb_filename = os.path.join(self.CONSURF_DATA_PATH, self.pdbid, 'pdbFILE_view_ConSurf.pdb')
     p = prody.parsePDB(consurf_pdb_filename)
     consurf_chain = p.getHierView().iterChains().next()
     return pd.DataFrame(
                         columns=['Conservation-score'],
                         index=self.resnum_index,
                         data=consurf_chain.ca.getBetas()[:len(self.resnum_index)],
                         )
def preprocess_pdb(args):
    pdb_file = args[1]
    output = "./" + args[2]+"/"+args[2]
    create_directory("./" + args[2])
    cluster_frames = get_frame_numbers(args)
    pdb = prody.parsePDB(pdb_file)
    # Get a copy of the pdb coords
    input_coordsets = numpy.array(pdb.getCoordsets()[cluster_frames])

    # Empty pdb
    pdb.delCoordset(range(pdb.numCoordsets()))

    # Build another pdb to store it
    input_pdb = prody.parsePDB(pdb_file)
    input_pdb.delCoordset(range(input_pdb.numCoordsets()))
    # And add the chosen coordsets
    for i in range(len(cluster_frames)):
        input_pdb.addCoordset(input_coordsets[i])
    prody.writePDB(output+"_ini.pdb", input_pdb)
    print_matrix(input_pdb.select("name CA").getCoordsets(), output)
    return pdb, input_coordsets, cluster_frames, output
Beispiel #57
0
    def _from_file(path):
        """
        Load a ProDy AtomGroup from a pdb file.

        Parameters:
            path - a string containing a filepath to a PDB file

        Returns:
            A ProDy AtomGroup
        """

        return prody.parsePDB(path)
Beispiel #58
0
def read_naccess_asa(asa_filename):
    '''
    Reads per-atom ASA values from ``asa_filename``.
    Returns a pandas.Series object containing the data.
    '''
    
    # asa file is a PDB file with ASA values as 
    # occupancy and VDW radii as B-factor
    atoms = prody.parsePDB(asa_filename).protein
    atoms_asa = atoms.getOccupancies()
    atoms_asa_series = pd.Series(data=atoms_asa, name='per_atom_asa')
    return atoms_asa_series
Beispiel #59
0
    def setUp(self):

        self.output = join(TEMPDIR, 'test_prody_catdcd.dcd')

        self.dcdpath = pathDatafile('dcd')
        self.pdbpath = pathDatafile('multi_model_truncated')

        self.dcd = DCDFile(self.dcdpath)
        self.ag = parsePDB(self.pdbpath, model=1)

        self.command = 'catdcd -o ' + self.output

        self.tearDown()
Beispiel #60
0
	def _find_ligand(self):
		self._get_file_path(ligand=True)
		protein = parsePDB(self.file_path)
		try:
			seq = protein['A'].getSequence()
		except:
			pass
		else:
			ligand = protein.select('not protein and not water')
			repr(ligand)
			if ligand:
				self.out_filename = self.file_path.split('.')[0] + '_ligand.pdb'
				writePDB(self.out_filename, ligand)