コード例 #1
0
ファイル: PPI.py プロジェクト: cbxx/Phantom
    def _processPDB(self):
        """ Processes the PDB file, i.e. adds all relevant atoms to a dataframe and determines the peptide an phosphate chains"""
        self._logger.info("Processing PDB")
        ppb=CaPPBuilder()
        d = []
        peptide_chains = {}
        phosphate_chains = set()
        # Loop over all chains
        for chain_idx, chain in enumerate(self._pdb[0]):
            isPeptideChain = False
            isPhosphateChain = False
            # try to create peptide sequence
            pp = ppb.build_peptides(chain)
            if pp:
                # tag chain as peptide chain
                isPeptideChain = True
                peptide_chains[chain.get_id()] = pp[0].get_sequence().tostring()
            # loop over residues in chain
            for residue in chain:
                resn = residue.get_resname()                
                if resn in ['PTR', 'TPO', 'SEP']:
                    # Chain contains a phospho-residue; tag as phosphateChain
                    isPhosphateChain = True
                    phosphate_chains.add(chain.get_id())
                # process atoms only if residue is not water and is part of a peptide or phospho chain
                if residue.get_id()[0] != 'W' and (isPeptideChain or isPhosphateChain):
                    resi = residue.get_id()[1]
                    inscode = residue.get_id()[2].strip()
                    hasPhosphate = False

                    for atom in residue:
                        vdw = -1
                        if isPeptideChain:
                            elem = atom.element
                            if elem:
                                elem = elem if len(elem)==1 else elem[0]+elem[1].lower()
                                vdw = self._periodicTable.GetRvdw(self._periodicTable.GetAtomicNumber(elem))
                        coords = atom.get_coord()
                        sn = atom.get_serial_number()
                        # append to dataframe
                        d.append((chain.get_id().strip(), chain_idx, resn, resi, inscode, sn, atom.get_name(), isPeptideChain, isPhosphateChain, coords[0], coords[1], coords[2], vdw))

                        if resn in ['PTR', 'TPO', 'SEP'] and atom.get_name().strip() == 'P' and len(coords)==3:
                            hasPhosphate = True

                if resn in ['PTR', 'TPO', 'SEP'] and not hasPhosphate:
                    # del residue because no annotated phosphate
                    d = d[:-len(residue)]


        if len(d) == 0:
            raise Exception('No amino acids found.')
        # save list to dataframe
        data = np.zeros((len(d), ), dtype=[('chain', 'a1'), ('chain_idx', 'a1'), ('resn', 'a3'), ('resi', 'i4'), ('inscode', 'a1'), ('sn', 'i4'), ('an', 'a4'), ('peptideChain', 'b'), ('phosphateChain', 'b'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('vdw', 'f4')])
        data[:] = d
        idf = pd.DataFrame(data)
        idf[['peptideChain', 'phosphateChain']] = idf[['peptideChain', 'phosphateChain']].astype('bool')
        self._df = idf
        self._peptideChains = peptide_chains
        self._phosphateChains = phosphate_chains
コード例 #2
0
def extract_seq_from_models(protien_name, file_name, fasta_to_write):
    print "Working with.....: ", file_name
    #GET the first model from the pdb and write to a temp file
    #the code based on BioPython lib to extract the 1-letter sequence works fast if we just one model from a large pdb
    #thus reducing the memory
    temp_pdb_file = "_temp_pdb.pdb"
    with open(temp_pdb_file, "a") as temp:
        with open(file_name) as ip:
            for line in ip:
                temp.write(line)
                if (line[0] == 'T'):
                    break

    structure = PDBParser().get_structure(protien_name, temp_pdb_file)
    # Using CA-CA
    ppb = CaPPBuilder()
    for pp in ppb.build_peptides(structure):
        seq = pp.get_sequence()

    os.remove(fasta_to_write)
    with open(fasta_to_write, "a") as fasta:
        fasta.write(">" + protien_name + "_seq\n")
        fasta.write(str(seq))
    ip.close()
    temp.close()
    fasta.close()
    os.remove(temp_pdb_file)
    print "Done, output file stored at: ", fasta_to_write
コード例 #3
0
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert (offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
コード例 #4
0
ファイル: HSExposure.py プロジェクト: juliahi/biopython
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert(offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
コード例 #5
0
ファイル: functions.py プロジェクト: Iris0802/protwis
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
コード例 #6
0
    def __init__(self,
                 model,
                 radius,
                 offset,
                 hse_up_key,
                 hse_down_key,
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
            the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (pi / 2):
                                hse_u += 1
                            else:
                                hse_d += 1
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
コード例 #7
0
ファイル: functions.py プロジェクト: pszgaspar/protwis
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
コード例 #8
0
ファイル: functions.py プロジェクト: marionatf/protwis
    def __init__(self,
                 model,
                 radius,
                 offset=0,
                 hse_up_key='HSE_U',
                 hse_down_key='HSE_D',
                 angle_key=None):
        """
        @param model: model
        @type model: L{Model}
        
        @param radius: HSE radius
        @type radius: float
        
        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int
        
        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string
        
        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string
        
        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert (offset >= 0)
        # For PyMOL visualization
        self.ca_cb_list = []
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        hse_map = {}
        hse_list = []
        hse_keys = []
        ### GP
        self.clash_pairs = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i == 0:
                    r1 = None
                else:
                    r1 = pp1[i - 1]
                r2 = pp1[i]
                if i == len(pp1) - 1:
                    r3 = None
                else:
                    r3 = pp1[i + 1]
                # This method is provided by the subclasses to calculate HSE
                result = self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle = result
                hse_u = 0
                hse_d = 0
                ca2 = r2['CA'].get_vector()
                residue_up = []  ### GP
                residue_down = []  ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            # neighboring residues in the chain are ignored
                            continue
                        ro = pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao = ro['CA'].get_vector()
                        d = (cao - ca2)
                        if d.norm() < radius:
                            if d.angle(pcb) < (math.pi / 2):
                                hse_u += 1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)
                                ### end of GP code
                            else:
                                hse_d += 1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id = r2.get_id()
                chain_id = r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
                hse_list.append(
                    (r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key] = hse_u
                r2.xtra[hse_down_key] = hse_d
                if angle_key:
                    r2.xtra[angle_key] = angle

                ### GP checking for atom clashes
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res != pp1[i - 1] and other_res != pp1[i +
                                                                            1]:
                                for other_atom in other_res:
                                    other_vector = other_atom.get_vector()
                                    d = other_vector - ref_vector
                                    if d.norm() < 2:
                                        self.clash_pairs.append([
                                            (pp1[i]['CA'].get_bfactor(),
                                             pp1[i].get_id()[1]),
                                            (other_res['CA'].get_bfactor(),
                                             other_res.get_id()[1])
                                        ])
                        except:
                            pass
コード例 #9
0
ファイル: HSExposure.py プロジェクト: BingW/biopython
    def __init__(self, model, radius, offset, hse_up_key, hse_down_key, 
            angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in 
            the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored 
                            continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(pi/2):
                                hse_u+=1
                            else:
                                hse_d+=1
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
コード例 #10
0
class ContactMapper(FeaturesComputer):
    '''
    Extends FeaturesComputer class. Extracts res and chainIds for training and predicting and computes contact maps 
    for training for a given complex
  '''
    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 isForPrediction=False,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file      
      @param computedFeatsRootDir: str. path where features will be stored
      @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located
                                   at the same path that unbound structures and need to be named as in the following example:
                                    1A2K_l_u.pdb  1A2K_r_b.pdb
      @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Amstrongs)
      @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will
                                    be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''
        FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir)

        self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0]
        self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0]
        if self.prefixR == self.prefixL:
            self.prefix = self.prefixR
        else:
            if "<" in self.prefixL:
                raise FeatureComputerException(
                    "Error. Ligand pdbFile name %s must not contain '<' or '>' character"
                    % lFname)
            if ">" in self.prefixR:
                raise FeatureComputerException(
                    "Error. Receptor pdbFile name %s must not contain '<' or'>' character"
                    % rFname)
            self.prefixR = self.getExtendedPrefix(rFname)
            self.prefixL = self.getExtendedPrefix(lFname)

            self.prefix = self.prefixL + "<->" + self.prefixR

        self.isForPrediction = isForPrediction
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        self.outPath = myMakeDir(self.computedFeatsRootDir,
                                 "common/contactMaps")
        self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab")
        self.parser = PDBParser(QUIET=True)
        #    self.ppb=PPBuilder( radius= 200) # To not worry for broken chains
        self.ppb = CaPPBuilder()
        self.computeFun = self.contactMapOneComplex

    def mapBoundToUnbound(self,
                          structureUnbound,
                          structureBound,
                          skipBoundChainsIds=set([])):
        '''
      Obtains correspondence between unbound structure and bound structure when available. Returns a dictionary
      that maps bound_residue --> equivalent unbound_residue
      
      @param structureUnbound: Bio.PDB.Structure. Structure in bound state
      @param structureBound:   Bio.PDB.Structure. Structure in unbound state
      @param skipBoundChainsIds:   Set of Chars. Set of chain ids that will be skipped for calculations. 
      @return bound2UnboundMapDict: Dict {Bio.PDB.Residue (from bound structure): Bio.PDB.Residue (from unbound structure)}
      
    '''
        bound2UnboundMapDict = {}
        pp_list_unbound = self.ppb.build_peptides(structureUnbound,
                                                  aa_only=False)
        if structureBound is None:  # if there is no bound structure, use just unbound.
            boundToUnboundMap = lambda x: x  #For a given residue will return the same residue
            pp_list_bound = pp_list_unbound
        else:
            pp_list_bound = self.ppb.build_peptides(structureBound,
                                                    aa_only=False)
            mapper = BoundUnboundMapper(
                pp_list_unbound,
                pp_list_bound)  # res_bound->res_unbound mapper object
            mapper.build_correspondence()
            boundToUnboundMap = mapper.mapBoundToUnbound  #For a given bound residue will return its unbound equivalent
        for pp in pp_list_bound:
            for resBound in pp:
                chainBound = resBound.get_full_id()[2]  # str chainId
                if chainBound in skipBoundChainsIds: continue
                resUnbound = boundToUnboundMap(resBound)
                if not resUnbound is None:  #In case there is no equivalent unbound residue for a given bound residue
                    bound2UnboundMapDict[resBound] = resUnbound
        return bound2UnboundMapDict

    def fixHomooligomers(self, structureL, structureR, positiveContacts,
                         chainsInContactL, chainsInContactR):
        '''
      For each interacting pair of residues (resL_1, resR_2), it will add to positiveContacts (res_1L', resR_2) and/or
      (resL_1, resR_2') where resL_1' is an equivalent residue in homooligomers of ligand
      
      @param structureL: Bio.PDB.Structure. Structure of ligand
      @param structureR:   Bio.PDB.Structure. Structure of receptor
      @param positiveContacts:  [(ligandResId, receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactL:  [(ligandResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @param chainsInContactR:  [(receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue
      @return positiveContacts, chainsInContactL, chainsInContactR. Updated with equivalent residues interactions added
      
    '''
        pp_list_l = self.ppb.build_peptides(structureL, aa_only=False)
        equivalentLmapper = HomoOligomerFinder(pp_list_l,
                                               positiveContacts,
                                               chainType="l")
        positiveContacts, chainsInContactL = equivalentLmapper.update_interactions(
        )
        pp_list_r = self.ppb.build_peptides(structureR, aa_only=False)
        equivalentRmapper = HomoOligomerFinder(pp_list_r,
                                               positiveContacts,
                                               chainType="r")
        positiveContacts, chainsInContactR = equivalentRmapper.update_interactions(
        )
        return positiveContacts, chainsInContactL, chainsInContactR

    def getPairsOfResiduesInContact(self, structureL, structureR):
        '''
      Computes which amino acids of ligand are in contact with which amino acids of receptor
      
      @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available)
      @param structureR:   Bio.PDB.Structure. Structure of receptor (bound state if available).
      @return positiveContacts:  Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))}
      @return chainsNotContactL: Set { str(chainId structureL)}
      @return chainsNotContactR: Set { str(chainId structureR)}
      
    '''
        try:
            atomListL = [
                atom for atom in structureL.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 1")
        try:
            atomListR = [
                atom for atom in structureR.child_list[0].get_atoms()
                if not atom.name.startswith("H")
            ]
        except IndexError:
            raise NoValidPDBFile("Problems parsing pdbFile 2")

        searcher = NeighborSearch(atomListL + atomListR)
        allNeigs = searcher.search_all(self.res2res_dist, level="R")
        lStructId = structureL.get_id()
        rStructId = structureR.get_id()
        positiveContacts = set([])
        chainsInContactL = set([])
        chainsInContactR = set([])
        for res1, res2 in allNeigs:
            pdbId1, modelId1, chainId1, resId1 = res1.get_full_id()
            pdbId2, modelId2, chainId2, resId2 = res2.get_full_id()
            fullResId1 = res1.get_full_id()
            fullResId2 = res2.get_full_id()
            if pdbId1 == lStructId and pdbId2 == rStructId:
                positiveContacts.add((fullResId1, fullResId2))
                chainsInContactL.add(fullResId1[2])
                chainsInContactR.add(fullResId2[2])
            elif pdbId1 == rStructId and pdbId2 == lStructId:
                positiveContacts.add((fullResId2, fullResId1))
                chainsInContactL.add(fullResId2[2])
                chainsInContactR.add(fullResId1[2])
        if CONSIDER_HOMOOLIG_AS_POS:
            positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers(
                structureL, structureR, positiveContacts, chainsInContactL,
                chainsInContactR)
        allChainsL = set([elem.get_id() for elem in structureL[0].get_list()])
        allChainsR = set([elem.get_id() for elem in structureR[0].get_list()])
        chainsNotContactL = allChainsL.difference(chainsInContactL)
        chainsNotContactR = allChainsR.difference(chainsInContactR)
        return positiveContacts, chainsNotContactL, chainsNotContactR

    def contactMapOneComplex(self):
        '''
      Computes the contact map of a complex. Initial input for complex codification. Contact map is a file written at
      self.computedFeatsRootDir/common/contactMaps/ with name prefix.cMap.tab where prefix is either the common name of
      ligand and receptor pdb files or the concatenation of ligand and receptor names.
      1A2K_l_u.pdb and 1A2K_r_u.pdb  --> 1A2K.cMap.tab
      1A2K_l_u.pdb and 1A22.pdb  --> 1A2K-1A22.cMap.tab
      
    '''
        outName = self.outName
        print(outName)
        if os.path.isfile(outName):
            print('Already computed contact map')
            return 0
        lStructId = self.prefixL + "_l_u.pdb"
        rStructId = self.prefixR + "_r_u.pdb"
        structureL_u = self.parser.get_structure(lStructId, self.lFname)
        structureR_u = self.parser.get_structure(rStructId, self.rFname)
        if self.boundAvailable == False or self.isForPrediction:
            structureL_b = None
            structureR_b = None
        else:
            try:
                lStructId_b = self.prefix + "_l_b.pdb"
                rStructId_b = self.prefix + "_r_b.pdb"
                lFname_b = os.path.join(
                    os.path.split(self.lFname)[0], lStructId_b)
                rFname_b = os.path.join(
                    os.path.split(self.rFname)[0], rStructId_b)
                structureL_b = self.parser.get_structure(lStructId_b, lFname_b)
                structureR_b = self.parser.get_structure(rStructId_b, rFname_b)
            except IOError as e:  # in this case there are just unbound pdbs available
                structureL_b = None
                structureR_b = None

        if self.isForPrediction:
            positiveContacts = None
            chainsNotContactR = set([])
            chainsNotContactL = set([])
        elif structureL_b is None or structureR_b is None:  #Compute contacs in bound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_u, structureR_u)
        else:  #Compute contacs in unbound structures
            positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact(
                structureL_b, structureR_b)

        if JUST_INTERACTING_CHAINS == False:
            chainsNotContactR = set([])
            chainsNotContactL = set([])

        rResDict = self.mapBoundToUnbound(structureR_u,
                                          structureR_b,
                                          skipBoundChainsIds=chainsNotContactR)
        lResDict = self.mapBoundToUnbound(structureL_u,
                                          structureL_b,
                                          skipBoundChainsIds=chainsNotContactL)
        nResiduesL = len(lResDict)
        nResiduesR = len(rResDict)
        if not (self.minNumResiduesPartner < nResiduesL <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesL, "1")
        if not (self.minNumResiduesPartner < nResiduesR <
                self.maxNumResiduesPartner):
            raise BadNumberOfResidues(nResiduesR, "2")

        outFile = open(outName, "w")
        outFile.write(
            "chainIdL structResIdL resNameL chainIdR structResIdR resNameR categ\n"
        )
        #    print(sorted(lResDict, key= lambda x: x.get_id()))
        #    a= raw_input()
        try:
            for resL_bound in sorted(lResDict, key=lambda x: x.get_full_id()):
                #      print(resL_bound.get_full_id())
                resL_unbound = lResDict[resL_bound]
                pdbIdL, modelL, chainIdL, resIdL = resL_unbound.get_full_id()
                resIdL = self.makeStrResId(resIdL)

                try:
                    letraL = three_to_one(resL_unbound.resname)
                    if letraL != three_to_one(resL_bound.resname): continue
                except KeyError:
                    continue
                for resR_bound in sorted(rResDict,
                                         key=lambda x: x.get_full_id()):
                    resR_unbound = rResDict[resR_bound]
                    pdbIdR, modelR, chainIdR, resIdR = resR_unbound.get_full_id(
                    )
                    try:
                        letraR = three_to_one(resR_unbound.resname)
                        if letraR != three_to_one(resR_bound.resname): continue
                    except KeyError:
                        continue
                    if self.isForPrediction:
                        categ = np.nan
                    elif (resL_bound.get_full_id(),
                          resR_bound.get_full_id()) in positiveContacts:
                        categ = 1
                    else:
                        categ = -1
                    resIdR = self.makeStrResId(resIdR)
                    if chainIdL == " ": chainIdL = "*"
                    if chainIdR == " ": chainIdR = "*"
                    #        print("%s %s %s %s %s %s %s\n" %(chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ) )
                    #        raw_input("enter")
                    outFile.write("%s %s %s %s %s %s %s\n" %
                                  (chainIdL, resIdL, letraL, chainIdR, resIdR,
                                   letraR, categ))
            outFile.close()
        except (KeyboardInterrupt, Exception):
            print("Exception happend computing %s" % outName)
            tryToRemove(outName)
            raise

    def makeStrResId(self, resId):
        valList = [str(elem) for elem in resId[1:]]
        finalId = "".join(valList).strip()
        return finalId