Ejemplo n.º 1
0
 def analyze_dihedral(self):
     """
     Deprecated. Please use class Dihedral_Analisys
     """
     angles = list()
     cov_atm_lig = self.ligand.child_dict[self.ligand_dict['cov_atm']]
     ##dihedral between CA < CB < SG < ligand
     angle_1 = math.degrees(
         bp.calc_dihedral(self.covalent_res.child_dict['CA'].get_vector(),
                          self.covalent_res.child_dict['CB'].get_vector(),
                          self.covalent_atm_res.get_vector(),
                          cov_atm_lig.get_vector()))
     angles.append(angle_1)
     ##all dihedral of CB < SG < ligand-covalent-atom < other ligand atoms
     ns = bp.NeighborSearch(list(self.ligand.get_atom()))
     neigh = ns.search(cov_atm_lig.get_coord(), 2)
     neigh = filter(lambda x: x.name != self.ligand_dict['cov_atm'],
                    neigh)  # removes the atom itself
     for i in neigh:
         ang = math.degrees(
             bp.calc_dihedral(
                 self.covalent_res.child_dict['CB'].get_vector(),
                 self.covalent_atm_res.get_vector(),
                 cov_atm_lig.get_vector(), i.get_vector()))
         angles.append(ang)
     open('/'.join([self.path, DIHEDRAL_OUTPUT]), 'w').write(
         reduce(lambda x, ang: ' '.join([x, str(ang)]), angles, ''))
Ejemplo n.º 2
0
def annotate_fallback(chain_list):
    """
    If neither DSSR nor MC-Annotate are available, we use an ad-hoc implementation of canonical
    basepair detection as fallback.
    This does not work well for missing atoms or modified residues.
    """
    kdtree = bpdb.NeighborSearch(
        [atom for chain in chain_list for atom in chain.get_atoms()])
    pairs = kdtree.search_all(10, "R")
    basepairs = {}
    # Sorted, so conflicting basepairs are deterministically solved
    for res1, res2 in sorted(pairs):
        if res1.resname.strip() not in RNA_RESIDUES or res1.id[0].startswith(
                "H_"):
            continue
        if res2.resname.strip() not in RNA_RESIDUES or res2.id[0].startswith(
                "H_"):
            continue
        labels = {res1.resname.strip(), res2.resname.strip()}
        try:
            is_bp = is_basepair_pair(res1, res2)
            if is_bp:
                res1_id = fgr.resid_from_biopython(res1)
                res2_id = fgr.resid_from_biopython(res2)
                if res1_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res1_id, res1_id, res2_id, res1_id,
                                      basepairs[res1_id]))
                    continue
                if res2_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res2_id, res2_id, res1_id, res2_id,
                                      basepairs[res2_id]))
                    continue
                basepairs[res1_id] = res2_id
                basepairs[res2_id] = res1_id
        except KeyError as e:
            log.debug("Missing atom %s. %s has atoms %s, %s has atoms %s", e,
                      res1, res1.child_dict, res2, res2.child_dict)
            pass

    seq_ids = []
    for chain in sorted(chain_list, key=lambda x: x.id):
        for residue in chain:
            seq_ids.append(fgr.resid_from_biopython(residue))
    bpseq = ""
    chain_dict = {c.id: c for c in chain_list}
    for i, seqid in enumerate(seq_ids):
        if seqid in basepairs:
            bp = seq_ids.index(basepairs[seqid]) + 1
        else:
            bp = 0

        bpseq += "{} {} {}\n".format(
            i + 1, chain_dict[seqid.chain][seqid.resid].resname.strip(), bp)
    return bpseq, seq_ids
Ejemplo n.º 3
0
 def get_atom_neighbors(self, atom, atoms, cov_dist=2):
     ns = bp.NeighborSearch(atoms)
     neighbors = ns.search(atom.get_coord(), cov_dist)
     covalent = [
         atm for atm in neighbors
         if not (atm.name.startswith('H') or atm.name == atom.name)
     ]  #filter Hydrogens & the atom itself
     return covalent
Ejemplo n.º 4
0
def is_contact(res_1, other_atoms, cutoff):
    for atom in res_1:
        ns = PDB.NeighborSearch(other_atoms)
        center = atom.get_coord()
        neighbors = ns.search(center, cutoff)  # 5.0 for distance in angstrom
        residue_list = PDB.Selection.unfold_entities(neighbors,
                                                     'R')  # R for residues
        if len(residue_list) > 0:
            return True
    return False
Ejemplo n.º 5
0
def interchain_contacts(struct):
    all_atoms = bpdb.Selection.unfold_entities(struct, 'A')

    ns = bpdb.NeighborSearch(all_atoms)
    pairs = ns.search_all(2.8)

    ic_pairs = []

    for (a1, a2) in pairs:
        if a1.parent.parent != a2.parent.parent:
            ic_pairs += [(a1, a2)]
    return ic_pairs
Ejemplo n.º 6
0
def noncovalent_distances(chain, cutoff=0.3):
    '''
    Print out the distances between all non-covalently bonded atoms
    which are closer than cutoff to each other.

    :param chain: The Bio.PDB chain.
    :param cutoff: The maximum distance
    '''
    all_atoms = bpdb.Selection.unfold_entities(chain, 'A')
    ns = bpdb.NeighborSearch(all_atoms)

    contacts = ns.search_all(cutoff)

    return [ftuv.magnitude(c[1] - c[0]) for c in contacts if not is_covalent(c)]
Ejemplo n.º 7
0
def num_noncovalent_clashes(chain):
    '''
    Check if a chain has non-covalent clashes. Non-covalent clashes are found
    when two atoms that aren't covalently linked are within 1.8 A of each other.

    :param chain: The chain to evaluate
    :param return: The number of non-covalent clashes.
    '''
    all_atoms = bpdb.Selection.unfold_entities(chain, 'A')
    ns = bpdb.NeighborSearch(all_atoms)

    contacts = ns.search_all(1.9)

    return len([c for c in contacts if not is_covalent(c)])
def neighbourhood(atoms,radius,show):
 pairs=set()
 for r1,r2 in PDB.NeighborSearch(atoms).search_all(radius,'A'):
   chain_list_1 = PDB.Selection.unfold_entities([r1], 'C')
   chain_list_2 = PDB.Selection.unfold_entities([r2], 'C')
   res_list_1 = PDB.Selection.unfold_entities([r1], 'R')
   res_list_2 = PDB.Selection.unfold_entities([r2], 'R')
   if chain_list_1[0].get_id() != chain_list_2[0].get_id():
      printverbose(sys.stdout,show,(" %s %s (Atom %s) (chain %s)  contact with %s %s (Atom %s) (chain %s) \n"%
            (res_list_1[0].get_resname(),res_list_1[0].get_id()[1],r1.get_id(),chain_list_1[0].get_id(),
             res_list_2[0].get_resname(),res_list_2[0].get_id()[1],r2.get_id(),chain_list_2[0].get_id())))
      pairs.add( ( res_list_1[0], chain_list_1[0].get_id(), res_list_2[0], chain_list_2[0].get_id() ) )
      pairs.add( ( res_list_2[0], chain_list_2[0].get_id(), res_list_1[0], chain_list_1[0].get_id() ) )
 return pairs
Ejemplo n.º 9
0
 def _find_covalent_atom(self):
     atoms = list(self.ligand.get_atom())#ligand atoms
     for res in self.structure.get_residues():
         if res.id[0] == ' ':#add all residues atoms
             atoms = atoms + res.get_list()
     ns = bp.NeighborSearch(atoms)
     neighbors = ns.search_all(self.covalent_distance)
     
     covalent = [cpl for cpl in neighbors if (cpl[0].parent.id[0] == ' ' and cpl[1].parent.id == self.ligand.id) or (cpl[0].parent.id == self.ligand.id and cpl[1].parent.id[0] == ' ' )] # filter only intermolecule bonds      
     covalent = [cpl for cpl in covalent if not (cpl[0].name.startswith('H') or cpl[1].name.startswith('H'))] #filter Hydrogens
     cov_cpl = min(covalent, key = lambda cpl: abs(cpl[0] - cpl[1]))
     
     ## decouple ligand & receptor covalent atoms
     self.receptor_cov_atom = [atom for atom in cov_cpl if atom.parent.id[0] == ' '][0]
     self.ligand_cov_atom = [atom for atom in cov_cpl if atom.parent.id == self.ligand.id][0]
Ejemplo n.º 10
0
def enumerate_interactions_kdtree(model):
    relevant_atoms = [
        a for a in model.get_atoms() if a.name[0] in ["C", "N", "O"]
    ]
    if not relevant_atoms:
        return set()
    kdtree = bpdb.NeighborSearch(relevant_atoms)
    pairs = kdtree.search_all(6, "A")
    res_pair_list = set()
    for a1, a2 in pairs:
        if a1.name not in all_side_chains and a2.name not in all_side_chains:
            continue
        p1 = a1.get_parent()
        p2 = a2.get_parent()
        if p1.id == p2.id:
            continue
        elif p1 < p2:
            res_pair_list.add((p1, p2))
        else:
            res_pair_list.add((p2, p1))
    interacting_residues = set()
    for res1, res2 in res_pair_list:
        rna_res = None
        other_res = None
        if res1.resname.strip(
        ) in RNA_RESIDUES and not res1.id[0].startswith("H_"):
            rna_res = res1
        else:
            other_res = res1
        if res2.resname.strip(
        ) in RNA_RESIDUES and not res2.id[0].startswith("H_"):
            rna_res = res2
        else:
            other_res = res2
        if rna_res is None or other_res is None:
            continue
        log.debug("%s(chain %s) and %s(chain %s, resname %s) are close",
                  rna_res, rna_res.parent.id, other_res, other_res.parent.id,
                  other_res.resname.strip())
        # Only consider C and N. So no ions etc
        if any(a.name[0] in ["C", "N"] for a in other_res.get_atoms()):
            interacting_residues.add(rna_res)
        else:
            log.debug("but %s has wrong atoms %s", other_res,
                      list(a.name for a in other_res.get_atoms()))
    log.debug("Interacting: %s", interacting_residues)
    return interacting_residues
Ejemplo n.º 11
0
def are_clashing(chain_one, chain_two, max_clashes=300, contact_distance=1.0):
    """
    Compares the CA atoms of two chains and checks for clashes according to the
    contact distance.
    Returns a boolean.
    """
    atoms_one = [atom for atom in chain_one.get_atoms() if atom.get_id() == 'CA']
    atoms_two = [atom for atom in chain_two.get_atoms() if atom.get_id() == 'CA']
    ns = pdb.NeighborSearch(atoms_one)
    clashes = 0
    for atom_two in atoms_two:
        for atom in ns.search(atom_two.get_coord(), contact_distance, 'A'):
            clashes += 1
            if clashes == max_clashes:
                if main.options.verbose:
                    sys.stderr.write("Clash Found!\n")
                return True
    return False
Ejemplo n.º 12
0
def has_clashes(move_atoms, model):
    """Compares the atoms backbone atoms of the moving chain with the backbone atoms of the model"""
    backbone = {"CA", "C1\'"}
    chain_atoms = [atom for atom in move_atoms
                   if atom.id in backbone]  # Gets only the backbone atoms
    model_atoms = [atom for atom in model.get_atoms() if atom.id in backbone]
    ns = PDB.NeighborSearch(
        model_atoms
    )  # Generates a neigbour search tree to speed up distance calculations
    clashes = 0
    for atom in chain_atoms:
        clashes += bool(ns.search(
            atom.coord,
            2))  # If this atom shows clashes, add 1 to the clashes counter
    if clashes / len(
            chain_atoms
    ) >= 0.03:  # If more than 3% of atoms show clashes return yes
        return True
    else:  # Otherwise return no
        return False
Ejemplo n.º 13
0
def clashing_filter(chain_one, chain_two, max_clashes=30, contact_distance=2.0):
    """
    Given a maximum number of clashes and a contact distance, the function checks for clashes
    in the alpha carbons (CA) of two different chains (chain_one and chain_two).

    Returns true if the maximum number of clashes is reached, else returns false.
    """
    atoms_one = [atom for atom in chain_one.get_atoms() if atom.get_id() == 'CA' or atom.get_id() =="C5'"]
    atoms_two = [atom for atom in chain_two.get_atoms() if atom.get_id() == 'CA' or atom.get_id() =="C5'"]
    ns = pdb.NeighborSearch(atoms_one)
    clashes = 0

    for atom_two in atoms_two:
        for atom in ns.search(atom_two.get_coord(), contact_distance, 'A'):
            clashes += 1
            if clashes == max_clashes:
                if main.options.verbose:
                    sys.stderr.write("Clash Found!\n")
                return True

    return False
Ejemplo n.º 14
0
def has_clashes(move_atoms, model, clash_dist):
    """Compares the backbone atoms of the moving chain with the backbone atoms of the model. Returns a boolean value based on the presence and abundancy of clashes.

    Keyword arguments:
    move_atoms -- set of atoms from the chain that is potentially going to be added to the macrocomplex
    model -- current macrocomplex model in construction
    clas_dist -- minimum clash distance between 2 atoms. The default minimum is 2 A

    Considerations:
    Alpha carbons are used as backbone atoms.
    Threshold: if 3% of atoms of the chain that is been comparing to the model shows clashes, <True> is returned. Hence, that chain won't be added to the model."""
    backbone = {"CA", "C1\'"}
    chain_atoms = [atom for atom in move_atoms if atom.id in backbone]
    model_atoms = [atom for atom in model.get_atoms() if atom.id in backbone]
    ns = PDB.NeighborSearch(model_atoms)
    clashes = 0
    for atom in chain_atoms:
        clashes += bool(ns.search(atom.coord, float(clash_dist)))
    if clashes / len(chain_atoms) >= 0.03:
        return True
    else:
        return False
Ejemplo n.º 15
0
def HydrogenBondFilter(receptor_atoms, molecule,affinity):
	ligand_id = os.path.basename(molecule).replace(".pdb","")
	ligand = parser.get_structure(ligand_id, molecule)
	checklist = []
	global targethydrogen, use_angle
	### Loop through list of all atoms we know contribute to hydrogen bonds from the protein. At least one is essential for function so poses only pass if they form at least one with the defined targets
	for target in targets:
		target_res = target.get_parent()
		protein_hydrogen = [atom for atom in bp.Selection.unfold_entities(target.get_parent(),"A") if ("H" in atom.get_id()) and target - atom <= 1.1]
		if protein_hydrogen:

			use_angle = True
			targethydrogen = protein_hydrogen[0]
		else:
			use_angle = False
		PosePassInfo = []
		### Grab the hydrogen bond info of each pose of each drug. Unfold the atoms of the drug and the combine it with the atoms of the the receptor. Then Search is performed on close atoms
		for posenum, pose in enumerate(ligand):
			pose_atoms = bp.Selection.unfold_entities(pose, "A")
			all_atoms = receptor_atoms + pose_atoms
			ns = bp.NeighborSearch(all_atoms)
			### Search for atoms that are within X angstrom of defined target. Then check whether found atoms are ligand atoms and if they are capable of hydrogen bond formation
			close_atoms = ns.search(target.coord, 3.6) 
			Hbond_LigAtoms = [atom for atom in close_atoms if (atom in pose_atoms) and (atom.get_name().translate(None,"0123456789") in Filter_atoms)]
			### Check if H-bond is within defined parameters per pose and put that in a list per drug. Poses that pass have their ligand efficiency written, poses that don't have 0.0

			Pass = HBondCheck(Hbond_LigAtoms,pose_atoms,posenum,affinity,target)
			PosePassInfo.append(Pass)
		### In case of multiple possible hydrogen bonds a list is kept of poses that have passed at least one of them
		if checklist:
			for posenum, entry in enumerate(PosePassInfo):
				if (entry != 0.0) and (checklist[posenum] == 0.0):
					checklist[posenum] = entry
		else:
			checklist = PosePassInfo

		
	return checklist
Ejemplo n.º 16
0
def process_queue(id):
    pdir = "/home/kenneth/proj/proMin/proteins/hagai/2018/pdbs"
    df = dfHag.iloc[id]
    # print(df)
    pdbFile = os.path.join(pdir, df.pdb + ".pdb")
    #Create a PDBParser object
    parser = PDBParser()  #PERMISSIVE = 0 will list all errors with PDB file
    STR = parser.get_structure(df.pdb, os.path.join(pdbFile))
    envComp = ""

    atoms = STR[0][df.chain].get_atoms()
    atomID = ""

    for atom in atoms:
        # print(atom.get_fullname())
        # print(atom.get_fullname().replace(' ',''))
        if atom.get_name() == df.atomName:
            resID = atom.get_parent().get_full_id()
            # print(resID,'blah')
            if (resID[3][0].find(df.resName) > -1) and (int(resID[3][1])
                                                        == df.resNum):
                atomID = resID[3]
                if atomID == '':
                    print(df.coord, resID, 'blank')
                    continue

                chain = STR[0][df.chain]
                atom = chain[atomID][atom.get_name()]
                atom_list = PDB.Selection.unfold_entities(STR[0], 'A')
                ns = PDB.NeighborSearch(atom_list)
                atoms = ns.search(atom.get_coord(), 2.8, level='A')
                # print(atoms)
                envComp = df.coord + "," + df.resName + "," + ",".join(
                    map(str, list(resID))) + "," + get_environment(atoms)

    # print(envComp)
    process_queue.q.put(envComp)
    return envComp
Ejemplo n.º 17
0
def Collision_Check(model_atom_list, addition_atom_list, radius):
    """Return a list of tuple containing the residue id interacting and the coordinates in conflict
    Input:
    -model = list of atoms take as reference for the collitions
    -addition = list of atoms being us to check against the model
    -radius = integrer required for become the empty radious (Amstrongs) around each atom
    Output: 
    -collisions_list = list of tuples of coordinates where atoms collide
    """

    model_ns = pdb.NeighborSearch(model_atom_list)

    collisions_list = []
    for atom in addition_atom_list:
        collision_list = model_ns.search(atom.get_coord(), radius, "R")
        collisions_list.extend([
            tuple([
                str(atom.get_parent().get_id()[1]),
                str(x.get_id()[1]),
                str(atom.get_coord())
            ]) for x in collision_list
        ])

    return collisions_list
Ejemplo n.º 18
0
        STR = get_cif_STR(cifPath)

        try:

            RES = STR[0][chain][("H_" + LIG, int(resID), " ")]

        except Exception as e:
            print("  ", i.Microenvironment_ID, ("H_" + LIG, int(resID), " "))
            print(e)
            continue

        resName = RES.get_resname()
        feATOMS = [atom for atom in RES.get_atoms() if "FE" in atom.get_name()]

        atom_list = PDB.Selection.unfold_entities(STR, "A")
        ns = PDB.NeighborSearch(atom_list)

        for atom in feATOMS:
            atomName = atom.get_name()
            feAtom = {
                "ligName": resName,
                "ligChain": chain,
                "resID": resID,
                "ligAtom": atomName,
                "Metal": i.Metal,
                "ligCoords": atom.get_coord()
            }
            annotations = {attr: getattr(i, attr) for attr in hagaiNames}
            neighbors = ns.search(atom.get_coord(), 3, level='A')
            neigh_dict = {
                i.Microenvironment_ID: {
Ejemplo n.º 19
0
 directory = sys.argv[1] # Folder with the chain pdbs
 distance = float(sys.argv[2])
 directory_out = os.path.join(directory, 'pairs') # Folder where pairs will be saved
 PDBparser = PDB.PDBParser(QUIET=True)
 done = []
 for pdb_1 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))):
     for pdb_2 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))):
         if (pdb_1 == pdb_2):   
             continue # Not to duplicate itself
         elif not (pdb_1, pdb_2) in done or not (pdb_2, pdb_1) in done: # Not to duplicate pairs
             done.append((pdb_1, pdb_2))
             structure1 = PDBparser.get_structure(pdb_1[:-4], os.path.join(directory,pdb_1))
             structure2 = PDBparser.get_structure(pdb_2[:-4], os.path.join(directory,pdb_2))
             for chain in structure2.get_chains():
                 atoms = list(chain.get_atoms()) 
                 ns = PDB.NeighborSearch(atoms) # An object to search chains near an atom 
                 for target_atom in structure1.get_atoms():
                     near = ns.search(target_atom.coord, distance)
                     if(near and (pdb_1[-5:-4],pdb_2[-5:-4]) not in done and (pdb_2[-5:-4],pdb_1[-5:-4]) not in done): # If there's an atom near, they interact
                         print('Joining chain {a} and {b}'.format(a=pdb_1[-5:-4], b=pdb_2[-5:-4]))
                         done.append((pdb_1[-5:-4],pdb_2[-5:-4]))
                         io = PDB.PDBIO()
                         structure1[0].child_list += [chain for chain in structure2.get_chains()] # Joins all the chains
                         io.set_structure(structure1)
                         if(not os.path.exists(os.path.join(directory_out))):
                             os.mkdir(os.path.join(directory_out))
                         p = re.search('^[A-Za-z0-9]+_([A-Za-z0-9]+).pdb$', pdb_1 ).group(1)
                         q = re.search('^[A-Za-z0-9]+_([A-Za-z0-9]+).pdb$', pdb_2 ).group(1)
                         io.save(os.path.join(directory_out,pdb_1[:-5]+p+q+'.pdb'))
                         print(os.path.join(directory,pdb_1[:-4]+'.fa'))
                         if(os.path.exists(os.path.join(directory,pdb_1[:-4]+'.fa'))) and os.path.exists(os.path.join(directory,pdb_2[:-4]+'.fa')):
Ejemplo n.º 20
0
def process_queue(id):

    # print(id)
    # print(c)
    # print(i)
    # print("i:", i)
    # iDict = list_tuples_to_dict(chunks)
    # print(type(iDict))
    # print(iDict.keys())
    # iDict = list_to_dict(i)
    # print(id)
    # pbar.update(1)
    series = feDF.loc[feDF['Microenvironment_ID'] == id].to_dict()
    # print(list(series['Metal'].values())[0])
    # annotations = {attr:series[attr] for attr in hagaiNames}

    pdb, LIG, chain, resID, function = re.split('[._]', id)
    # # print(pdb,LIG,chain,resID,function,i.Metal)
    cifPath = os.path.join(pdir, pdb + ".cif")
    STR = get_cif_STR(cifPath)

    try:

        RES = STR[0][chain][("H_" + LIG, int(resID), " ")]

    except Exception as e:
        print("  ", series['Microenvironment_ID'],
              ("H_" + LIG, int(resID), " "))
        print(e)
        # continue

    resName = RES.get_resname()
    feATOMS = [atom for atom in RES.get_atoms() if "FE" in atom.get_name()]

    atom_list = PDB.Selection.unfold_entities(STR[0], "A")
    ns = PDB.NeighborSearch(atom_list)

    #return dataframe
    output = []

    for atom in feATOMS:
        atomName = atom.get_name()
        feAtom = {
            "id_hagai": id,
            "ligName": resName,
            "ligChain": chain,
            "resID": resID,
            "ligAtom": atomName,
            "Metal": list(series['Metal'].values())[0],
            "ligX": atom.get_coord()[0],
            "ligY": atom.get_coord()[1],
            "ligZ": atom.get_coord()[2]
        }  #,"micro_annotations":annotations}
        neighbors = ns.search(atom.get_coord(), 3, level='A')
        # output.append((id,len(neighbors)))
        c = 0
        for neigh in neighbors:
            if neigh - atom != 0:
                feAtom["N_" + str(c) +
                       "_resName"] = neigh.get_parent().get_resname()
                feAtom["N_" + str(c) + "_atomName"] = neigh.get_name()
                feAtom["N_" + str(c) +
                       "_resNum"] = neigh.get_parent().get_id()[1]
                feAtom["N_" + str(c) + "_x"] = neigh.get_coord()[0]
                feAtom["N_" + str(c) + "_y"] = neigh.get_coord()[1]
                feAtom["N_" + str(c) + "_z"] = neigh.get_coord()[2]
                feAtom["N_" + str(c) + "_dist"] = neigh - atom
                feAtom["N_" + str(c) +
                       "_dx"] = neigh.get_coord()[0] - atom.get_coord()[0]
                feAtom["N_" + str(c) +
                       "_dy"] = neigh.get_coord()[1] - atom.get_coord()[1]
                feAtom["N_" + str(c) +
                       "_dz"] = neigh.get_coord()[2] - atom.get_coord()[2]
                count += 1
        output.append(feAtom)
        # neigh_dict = {id:{"feAtom":feAtom,"Microenvironment_annotations":annotations,"Neighbors":{"Neighbor_"+str(idx): {"N_"+str(idx)+"_resName":neigh.get_parent().get_resname(),"N_"+str(idx)+"_atomName":neigh.get_name(),"N_"+str(idx)+"_resNum":neigh.get_parent().get_id()[1],"N_"+str(idx)+"_atomCoord":neigh.get_coord(),"N_"+str(idx)+"_distance":neigh-atom,"N_"+str(idx)+"_dx":neigh.get_coord()[0]-atom.get_coord()[0],"N_"+str(idx)+"_dy":neigh.get_coord()[1]-atom.get_coord()[1],"N_"+str(idx)+"_dz":neigh.get_coord()[2]-atom.get_coord()[2]} for idx, neigh in enumerate(neighbors) if neigh-atom != 0 }}}
        # print(neigh_dict)
    process_queue.q.put(output)
    return output
Ejemplo n.º 21
0
def is_steric_clash(structure, rotating_chain, distance_for_clash=2.5):
    """Check if there is a steric clash between a rotating chain and current structure. Return False (no clash), 1 (clash between two different chains) or 2 (same chain). Also returns the ids of the chains in structure that are clashing

    Keyword arguments:
    structure -- whole structure
    rotating_chain -- chain to be analyzed with respect to structure
    distance_for_clash -- threshold to consider clash between atoms. Default = 2.5 (Armstrongs)

    Clash criteria: at least 20 of the atoms are at a lower distance than distance_for_clash
    Same chain criteria: RMSD between them <= 3.0 """

    # initialize the neighbor search
    NS = pdb.NeighborSearch(list(structure.get_atoms()))

    clashing_chains = set()  # the set of clashing chains (in structure)
    n_clashes = 0  # the number of clashes

    for at in rotating_chain.get_atoms():
        neighbors = NS.search(at.get_coord(), distance_for_clash)

        if len(neighbors) > 0:
            for neigh in neighbors:
                clashing_chains.add(neigh.get_parent().get_parent().id)
                n_clashes += 1

    if len(clashing_chains) > 1 and n_clashes > 20:
        # a clash against different chains:
        val_to_return = 1

    elif len(clashing_chains) == 1 and n_clashes > 20 and rotating_chain.id[
            1] == list(clashing_chains)[0][1]:

        # a clash MAYBE because you are trying to superimpose something in the place it was already

        # define the clashing chain:
        clash_chain = structure[0][list(clashing_chains)[0]]
        res_chain1 = list(clash_chain.get_residues())
        res_chain2 = list(rotating_chain.get_residues())

        # so first we obtain a list of the common residues
        common_res_s1 = get_list_of_common_res(res_chain1, res_chain2)
        common_res_s2 = get_list_of_common_res(res_chain2, res_chain1)

        # then we obtain a list of atom objects to use it later
        common_atoms_s1 = np.array([
            list(x.get_coord())
            for x in get_atom_list_from_res_list(common_res_s1)
        ])
        common_atoms_s2 = np.array([
            list(x.get_coord())
            for x in get_atom_list_from_res_list(common_res_s2)
        ])

        # debug the atoms provided
        if len(common_atoms_s1) == len(common_atoms_s2):
            RMSD = rmsd.kabsch_rmsd(common_atoms_s1, common_atoms_s2)
        else:
            RMSD = 1000  # like being different

        if RMSD <= 3.0:
            # it is the same chain
            val_to_return = 2

        else:
            # it is another chain or the same with different structure
            val_to_return = 1

    elif n_clashes > 20:
        # it is ine chain and the previous conditions are not fullfilled
        val_to_return = 1

    else:
        # no clash
        val_to_return = False

    if val_to_return:
        return val_to_return, clashing_chains
    else:
        return val_to_return, None
Ejemplo n.º 22
0
    def count_receptor_contacts(cls, paths, complexname, receptor_chain,
                                ligand_chain, nwindows, dbf, model_db_file,
                                query_dict):
        """
        Count number of paths contacting each receptor
        """

        wd = os.path.dirname(model_db_file)
        pdb_kwargs = dict(complexname=complexname,
                          receptor_chain=receptor_chain,
                          ligand_chain=ligand_chain,
                          nwindows=nwindows)
        pdbwindowid = "{complexname}{receptor_chain}{ligand_chain}{nwindows}".format(
            **pdb_kwargs)
        outfile = os.path.join(wd, "{0}_path_contacts.pdb".format(pdbwindowid))
        path_score_file = os.path.join(
            wd, "{0}_receptor_occupancy.csv".format(pdbwindowid))
        if not shared.missing(outfile) and not shared.missing(path_score_file):
            logging.debug("%s exists", outfile)
            return

        cutoff = 5.0

        residue_fmt = "{chain}_{resname}{resid[1]}"

        def make_key(residue):
            __, __, chainid, residueid = residue.get_full_id()
            return residue_fmt.format(chain=chainid,
                                      resname=residue.get_resname(),
                                      resid=residueid)

        # Drop window1 (modelid) column
        orig_window_vars = [
            x for x in paths.columns.values.tolist() if x.startswith("window")
        ]
        for window_var in orig_window_vars:
            paths = paths.drop(window_var, axis=1)
        # Get model filepaths for paths
        filepaths = cls.get_paths(paths[['pathsid']],
                                  dbf=dbf,
                                  model_db_file=model_db_file,
                                  query_dict=query_dict)
        window_vars = [
            x for x in filepaths.columns.values.tolist()
            if x.startswith("window")
        ]
        get_files = lambda row: [row[w] for w in window_vars]

        parser = PDB.PDBParser(QUIET=True)
        # Remove hydrogens
        get_structure = lambda x: parser.get_structure(
            os.path.splitext(os.path.basename(x))[0], shared.strip_h(x))
        modelid = 0

        receptor_contacts = collections.defaultdict(set)
        for x, row in filepaths.iterrows():
            pathsid = row['pathsid']
            path_files = get_files(row)
            for fn in path_files:
                structure = get_structure(fn)
                atoms = [
                    atom for chain in structure[modelid] for residue in chain
                    for atom in residue
                ]
                if not atoms:
                    raise PlotPathsError("No atoms in %s" % fn)
                ns = PDB.NeighborSearch(atoms)
                search = ns.search_all(radius=cutoff, level="R")
                for res1, res2 in search:
                    __, __, c1, r1 = res1.get_full_id()
                    __, __, c2, r2 = res2.get_full_id()
                    # Skip if chains are both ligand or both receptor
                    if (c1 == ligand_chain) == (c2 == ligand_chain):
                        continue
                    if c1 in receptor_chain:
                        key = make_key(res1)
                    elif c2 in receptor_chain:
                        key = make_key(res2)
                    else:
                        raise PlotPathsError("Neither %s nor %s is receptor" %
                                             (c1, c2))
                    receptor_contacts[key].add(pathsid)
        # Convert from defaultdict to normal dict
        receptor_contacts = dict(receptor_contacts)

        # Count paths contacting each receptor residue
        emptyset = set()
        # Chains have been combined
        r_ch = receptor_chain[0]
        # Deliberately using last structure from loop
        for residue in structure[modelid][r_ch]:
            key = make_key(residue)
            mypaths = receptor_contacts.get(key, emptyset)
            count = len(mypaths)
            for atom in residue:
                atom.set_bfactor(count)

        # Write out structure with b-factor
        #structure[modelid].detach_child(ligand_chain)
        #io = PDB.PDBIO()
        #io.set_structure(structure)
        #io.save(outfile)

        # Count receptor contacts for each path
        path_score_dict = collections.defaultdict(int)
        for contacts in receptor_contacts.itervalues():
            n_contacts = len(contacts)
            for pathid in contacts:
                path_score_dict[pathid] += n_contacts
        path_score_df = pd.DataFrame(path_score_dict.items(),
                                     columns=["pathid", "occupancyscore"])
        path_score_df.to_csv(path_score_file, index=False)
 def __init__(self, protein_pdbpath: str, distance: float = 1.5):
     parser = PDB.PDBParser(QUIET=True, PERMISSIVE=True)
     s = parser.get_structure('protein', protein_pdbpath)
     self.kd = PDB.NeighborSearch(list(s.get_atoms()))
     self.radius = distance
Ejemplo n.º 24
0
HSEB = PDB.HSExposureCB(s)

HSEB_dict = HSEB.property_dict
HSEB_keys = HSEB.property_keys
HSEB_list = HSEB.property_list

depth = PDB.ResidueDepth(s)
dep_dict = depth.property_dict
dep_keys = depth.property_keys
dep_list = depth.property_list

dssp = PDB.DSSP(s, "3skpFH.pdb")
dssp_dict = dssp.property_dict

nb_dict = {}
nb = PDB.NeighborSearch(ca_list)
for a in ca_list:
    t = nb.search(a.get_coord(), 8)
    aa = a.get_parent()
    aa_id = (aa.get_parent().get_id(), aa.get_id())
    nb_dict[aa_id] = t

dic = {}

dic["res_id"] = []
for a in aa_list:
    dic["res_id"].append(a.get_id())

dic["res_name"] = []
for a in aa_list:
    dic["res_name"].append(a.get_resname())
Ejemplo n.º 25
0
def calculate_sphere_variance(structure, chain, md_df, mapping):
    """
    Calculates the sphere variance and returns the corresponding statistic
    Works on a single seq/structure
    :param structure: Bio.PDB structure object
    :param chain: Chain ID as str
    :param md_df: Dataframe containing sensitivity (masked_dump file)
    :param mapping: Dict mapping sensitivity coordinates to PDB coordinates
    :return: A data frame containing the statistics per GO
    """
    # get the sequence:
    seq_len = len(md_df['AA'][1:].tolist())
    # get the list of gos:
    gos = {c.split('_')[0] for c in md_df.columns if c.startswith('GO:')}
    allowed = set(mapping.values())

    # get the residiues:
    residues = structure[0][chain]

    sphere_variances_per_go = {go: [] for go in gos}
    mean_variances = {go: 0. for go in gos}
    mean_fakes = {go: np.zeros(bs) for go in gos}
    p_vals = {}
    emp_vals = {}
    bs_mean = {}
    bs_median = {}
    # for residue in protein
    n_clean = 0
    for seq_idx in range(-1, seq_len):
        if seq_idx == -1:
            # set all to nan:
            for go in gos:
                sphere_variances_per_go[go].append(np.nan)
        elif seq_idx not in mapping:
            for go in gos:
                sphere_variances_per_go[go].append(np.nan)
            continue
        else:
            try:
                res = residues[mapping[seq_idx]]
                ca = res['CA']
            except:
                for go in gos:
                    sphere_variances_per_go[go].append(np.nan)
                continue

            # get the neighbors:
            glob_resseq = mapping[seq_idx][1]
            center = ca.get_coord()
            search = pdb.NeighborSearch(atom_list=list(structure.get_atoms()))
            neighbors = search.search(center=center, radius=radius, level="R")

            n_clean_neighbors = 0
            imps = {x: [] for x in gos}
            fakes = {x: [] for x in gos}
            for n in neighbors:
                het_atm, resseq, _ = n.id
                if het_atm.strip() == '' and resseq >= 0:
                    if abs(resseq -
                           glob_resseq) >= exclude and n.id in allowed:
                        for go in gos:
                            try:
                                imps[go].append(n.sensitivity[go])
                                fakes[go].append(n.fake[go])
                                n_clean_neighbors += 1
                            except AttributeError:
                                break

            if n_clean_neighbors == 0:
                for go in gos:
                    sphere_variances_per_go[go].append(np.nan)
                continue

            for go in gos:
                current_var = np.nanvar(imps[go])
                current_fake = np.nanvar(fakes[go], axis=0)
                sphere_variances_per_go[go].append(current_var)
                mean_variances[go] += np.nan_to_num(current_var)
                mean_fakes[go] += np.nan_to_num(current_fake)

            n_clean += 1

    # normalize the variances:
    if n_clean > 1:
        for go in gos:
            mean_variances[go] /= n_clean
            mean_fakes[go] /= n_clean
            # do the test
            _, p_vals[go] = stats.ttest_1samp(mean_fakes[go],
                                              popmean=mean_variances[go],
                                              nan_policy="omit")
            emp_vals[go] = (
                1 + np.sum(mean_fakes[go] <= mean_variances[go])) / (1 + bs)
            bs_mean[go] = np.nanmean(mean_fakes[go])
            bs_median[go] = np.nanmedian(mean_fakes[go])

    # now convert the sphere vars to df and append:
    # sphere_variances_df = pd.DataFrame.from_dict(sphere_variances_per_go, orient='columns')
    # sphere_variances_df.columns = ['%s_%s_svar' % (c, sens_to_use) for c in sphere_variances_per_go] # as the keys were go terms
    # md_df_svar = pd.concat([md_df, sphere_variances_df], axis=1)

    record = {
        "mean_variances": mean_variances,
        "p_vals": p_vals,
        "emp_vals": emp_vals,
        "bs_mean": bs_mean,
        "bs_median": bs_median
    }
    df_svar = pd.DataFrame.from_records(record)
    return df_svar
def build_complex(threshold, distance, stoichiometry, sequences, structures, verbose, initial):
    '''
        This is the core function of the program. 
        
        Takes a list of structures and tries to join them in a single model, taking into account
        diferent parameters, such as an intial model, a threshold for the pairwise alignments and 
        the distance to consider that two chains are in the same position and it must be discarded 
        (different atoms cannot occupy the same space)
    '''
    # Initiate empty structure and check some errors
    seqs = get_fastas_from_structs(structures, sequences)
    
    # Initializing an empty structure to save the model
    full_structure = PDB.Structure.Structure('full')
    full_structure.add(PDB.Model.Model('model'))
    # We will need to assign ids. For now is limited to up to 64 different chains, but more single characters can be added
    ids = _chain_id()
    iters = 0
    pair_num = len(structures)
    # Prepare a dict that saves how many chains of every one in the stoichiometry has the model in construction
    if(stoichiometry):
        current_number_of_chains = {chain_id:0 for chain_id in set(stoichiometry)}
    # We need at least 2 pdbs with an interaction. Can be the same one repeated. This is done to avoid trouble
    if(pair_num < 2):
        raise ValueError('Needed at least 2 pdbs to superpose')
    failed = []
    current = 0
    done = []
    
    
    # If an initial structure was given, introduce it into the model
    if(initial):
        if verbose: print('Initializing complex')
        # Obtaining the sequences of the chains
        init_seqs = get_fastas_from_structs([initial], sequences)
        seqs[full_structure.id] = {}
        # For each chain, add it to the model with sequential ids: A->B-> etc
        for idx, chain in enumerate(initial.get_chains()):
            chain_id = next(ids)
            seqs[full_structure.id][chain_id] = init_seqs[initial.id][chain.id]
            chain_new = PDB.Chain.Chain(chain_id)
            chain_new.child_list = list(chain.get_residues())
            model = next(full_structure.get_models())
            # If stoichiometry is selected, then count which sequences will be added
            if(stoichiometry):
                chain_sequence = _get_chain_sequence(chain_new)
                for seq in sequences:
                    alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) 
                    # This chain corresponds to this stoichiometry chain
                    if(alignment[0][2]/len(alignment[0][0])> threshold and
                       current_number_of_chains[seq.id] < stoichiometry[seq.id]):
                        model.child_list.append(chain_new)
                        current_number_of_chains[seq.id] += 1
                        if(stoichiometry):
                            count = 0
                            for chain in current_number_of_chains:
                                if current_number_of_chains[chain] == stoichiometry[chain]:
                                    count += 1
                                if count == len(stoichiometry):
                                    print('Stoichiometry fullfilled!')
                                    return full_structure
            model.child_list.append(chain_new)
        done.append(initial.id)
    if verbose: print('Starting to build')
    # The main loop of the function
    while True:
        if verbose: 
            print('Loop #%i' % current)
            print('Current number of chains: %i' % len(list(full_structure.get_chains())))
        # All the structures where added correctly (should only be for non stoichiometric uses)
        if(not stoichiometry and len(done) == len(structures)): 
            break
        structure = structures[current % len(structures)]
        if(structure.id in done): 
            current += 1
            continue # If is already in, continue to the next one
        count = list(failed.count(element) for element in failed)
        # If failed has an element twice, that means that no more available chains could be added
        # Therefor, it will start an endless loop, as the structure remain equal no matter which 
        # of the left structures is trying to be added. We stop it here
        if 3 in count: # We are repeating structures
            if verbose: print('Some pdbs could not be joined.\nAnd endless loop started and no more chains could be added\nFinishing the model in the current state')
            break
        if len(list(full_structure.get_chains())) == 0:
            if verbose: print('Initializing complex')
            seqs[full_structure.id] = {}
            for idx, chain in enumerate(structure.get_chains()):
                chain_id = next(ids) # Selecting the next id available
                seqs[full_structure.id][chain_id] = seqs[structure.id][chain.id] # Assigning the new chains its sequence
                # Empty new chain with the new id
                chain_new = PDB.Chain.Chain(chain_id)
                # Adding the residues to the new chain and adding the chain to the new model
                chain_new.child_list = list(chain.get_residues())
                model = next(full_structure.get_models())
                # Check stoichiometry if needed
                if(stoichiometry):
                    # Need the exact sequence to align
                    chain_sequence = _get_chain_sequence(chain_new)
                    # Align with every sequence in the fasta to know which sequence is 
                    for seq in sequences:
                        alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) 
                        try:
                            current_number_of_chains[seq.id]
                        except KeyError:
                            # The sequence chain is not in the stoichiometry
                            raise errors.chain_in_stoic_not_in_fasta(seq.id)
                        # If homologous and does not surpass the stoichiometry, add it
                        if(alignment[0][2]/len(alignment[0][0])> threshold and
                            current_number_of_chains[seq.id] < stoichiometry[seq.id]):
                            model.child_list.append(chain_new)
                            current_number_of_chains[seq.id] += 1
                            count = 0
                            for chain in current_number_of_chains:
                                if current_number_of_chains[chain] == stoichiometry[chain]:
                                    count += 1
                                if count == len(stoichiometry):
                                    print('Stoichiometry fullfilled!')
                                    return full_structure
                model.child_list.append(chain_new)

            done.append(structure.id)
            current += 1
            continue
        # The Enseble class keeps code ordered and abstracts the alignment
        # and superimposition of the code
        pair = Ensemble(full_structure, structure)
        pair_seqs = (seqs[full_structure.id], seqs[structure.id])
        alignment = pair.get_best_alignment(pair_seqs)
        # If score is > 0.95, they should be homologs
        for align in alignment:
            if(align[1] > threshold):
                #try:
                atoms_of_chains = pair.superimpose(align[0][0], align[0][1])
                #except PDB.PDBExceptions.PDBException:
                #    current += 1
                #    continue
                iters += 1
                neighbor = PDB.NeighborSearch(list(full_structure.get_atoms()))
                clashes = 0
                for chain in atoms_of_chains:
                    for atom in atoms_of_chains:
                        for atom in chain.get_atoms():
                            close_atoms = neighbor.search(atom.get_coord(), distance)
                            # If there are atoms within 2 angstroms, consider a clash
                            if len(close_atoms) > 0:
                                clashes += 1
                if (clashes < 10 and pair.rms < 0.05):
                    # The chain can be added, so let's add it
                    for chain in atoms_of_chains:
                        chain_id = next(ids) # Chosing an id
                        chain2 = PDB.Chain.Chain(chain_id) # An empty chain which will add the residues to join
                        chain2.child_list += list(chain.get_residues())
                        model = next(full_structure.get_models())
                        if(stoichiometry): # Taking the stoichiometry into account
                            chain_sequence = _get_chain_sequence(chain)
                            for seq in sequences:
                                alignment = pairwise2.align.globalxx(chain_sequence, seq.seq, one_alignment_only=True) 
                                # This chain corresponds to this stoichiometry chain
                                if(alignment[0][2]/len(alignment[0][0])> threshold and
                                           current_number_of_chains[seq.id] < stoichiometry[seq.id]):
                                    model.child_list.append(chain2)
                                    current_number_of_chains[seq.id] += 1
                                    count = 0
                                    for chain in current_number_of_chains:
                                        if current_number_of_chains[chain] == stoichiometry[chain]:
                                            count += 1
                                    if verbose: print('Current number of chains: %i' % sum(current_number_of_chains.values()))
                                    if count == len(stoichiometry):
                                        if verbose: print('Stoichiometry fullfilled!')
                                        return full_structure
                        else:
                            model.child_list.append(chain2)
                            if verbose and stoichiometry: print('Current number of chains: %i' % sum(current_number_of_chains.values()))
                            done.append(structure.id)
                    failed = []
                    break
        else:
            failed.append(structure.id)
            # Pdbs clash
        current += 1
    return full_structure