Esempio n. 1
0
def get_msms_df(model,
                pdb_file,
                outfile=None,
                outdir=None,
                outext='_msms.df',
                force_rerun=False):
    """Run MSMS (using Biopython) on a Biopython Structure Model and the path to the actual PDB file.

    Returns a dictionary of:
        {chain_id: {resnum1_id: (res_depth, ca_depth)},
                   {resnum2_id: (res_depth, ca_depth)} }
    Depths are in units Angstroms. 1A = 10^-10 m = 1nm

    Args:
        model: Biopython Structure Model
        pdb_file: Path to PDB file

    Returns:
        Pandas DataFrame: ResidueDepth property_dict, reformatted

    """
    # Create the output file name
    outfile = ssbio.utils.outfile_maker(inname=pdb_file,
                                        outname=outfile,
                                        outdir=outdir,
                                        outext=outext)

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        # Run MSMS with Biopython
        try:
            rd = PDB.ResidueDepth(model, pdb_file)
        except AssertionError:
            log.error('{}: unable to run MSMS'.format(pdb_file))
            return pd.DataFrame()

        # Reorganize the results into a csv file
        appender = []
        for k in rd.property_keys:
            x = rd.property_dict[k]
            chain = k[0]
            residue = k[1]
            het = residue[0]
            resnum = residue[1]
            icode = residue[2]
            resdepth = x[0]
            cadepth = x[1]
            appender.append((chain, resnum, icode, resdepth, cadepth))

        df = pd.DataFrame.from_records(
            appender,
            columns=['chain', 'resnum', 'icode', 'res_depth', 'ca_depth'])
        df.to_csv(outfile)
    else:
        log.debug(
            '{}: already ran MSMS and force_rerun={}, loading results'.format(
                outfile, force_rerun))
        df = pd.read_csv(outfile, index_col=0)

    return df
Esempio n. 2
0
def get_msms_df(model, pdb_id, outfile=None, outdir=None, outext='_msms.df', force_rerun=False):
    """Run MSMS (using Biopython) on a Biopython Structure Model.

    Depths are in units Angstroms. 1A = 10^-10 m = 1nm. Returns a dictionary of::

        {
            chain_id:{
                        resnum1_id: (res_depth, ca_depth),
                        resnum2_id: (res_depth, ca_depth)
                     }
        }

    Args:
        model: Biopython Structure Model

    Returns:
        Pandas DataFrame: ResidueDepth property_dict, reformatted

    """
    # XTODO: need to deal with temporary surface/vertex files in tmp directory when running on a large scale --
    # XTODO: will run into inode limits! Also, some valuable information is in these MSMS output files that we should save.

    # Create the output file name
    outfile = ssbio.utils.outfile_maker(inname=pdb_id, outname=outfile, outdir=outdir, outext=outext)

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        # Run MSMS with Biopython
        try:
            rd = PDB.ResidueDepth(model)
        except AssertionError:
            log.error('{}: unable to run MSMS'.format(pdb_id))
            return pd.DataFrame()

        # Reorganize the results into a csv file
        appender = []
        for k in rd.property_keys:
            x = rd.property_dict[k]
            chain = k[0]
            residue = k[1]
            het = residue[0]
            resnum = residue[1]
            icode = residue[2]
            resdepth = x[0]
            cadepth = x[1]
            appender.append((chain, resnum, icode, resdepth, cadepth))

        df = pd.DataFrame.from_records(appender, columns=['chain', 'resnum', 'icode', 'res_depth', 'ca_depth'])
        df.to_csv(outfile)
    else:
        log.debug('{}: already ran MSMS and force_rerun={}, loading results'.format(outfile, force_rerun))
        df = pd.read_csv(outfile, index_col=0)

    return df
Esempio n. 3
0
def cal_depth(s, aa_list_full):
    depth = PDB.ResidueDepth(s)  #氨基酸到蛋白质表面距离
    dep_dict = depth.property_dict
    dps = []
    for a in aa_list_full:
        try:
            aa_id = (a.get_parent().get_id(), a.get_id())
            if dep_dict.get(aa_id):
                dps.append(dep_dict[aa_id])
            else:
                dps.append([None, None])
        except:
            dps.append([None, None])
    dps = np.array(dps)
    return dps
Esempio n. 4
0
def cal(i):
	print(pdbid[i],pdbchain[i])
	pdb_name='pdb_/pdb'+pdbid[i].lower()+'.ent'     #pdb name
	try:
		s = p.get_structure("1",pdb_name)       #read pdb struture
		s = s[0][pdbchain[i]]                   #choose chain
		res_list = PDB.Selection.unfold_entities(s, 'R')   #read aminoacid
	except:
		return 0
	
	aa_list = []
	for a in res_list:
		if PDB.is_aa(a):
			aa_list.append(a)  #get acid

	error=0
	t=aa_list[0].get_id()[1]
	aa_list_full=[]
	for a in aa_list:
		while 1:
			if a.get_id()[1]<t:
				error=1
				break
			if a.get_id()[1]==t:
				aa_list_full.append(a)
				t+=1
				break
			else:
				aa_list_full.append(None)
				t+=1
	if error==1:                 
		return 0
	
	try:
		depth=PDB.ResidueDepth(s)   #氨基酸到蛋白质表面距离
	except:
		return 0

	dep_dict=depth.property_dict
	dep_keys=depth.property_keys
	dep_list=depth.property_list
	dps=[]
	for a in aa_list_full:
		try:
			aa_id=(a.get_parent().get_id(),a.get_id())
			if dep_dict.get(aa_id):
				dps.append(dep_dict[aa_id])
			else:
				dps.append([None,None])
		except:
			dps.append([None,None])
	dps=np.array(dps)

	try:
		HSEA=PDB.HSExposureCA(s)
	except:
		return 0

	HSEA_dict=HSEA.property_dict
	HSEA_keys=HSEA.property_keys
	HSEA_list=HSEA.property_list
	hse_a=[]
	for a in aa_list_full:
		try:
			aa_id=(a.get_parent().get_id(),a.get_id())
			if HSEA_dict.get(aa_id):
				hse_a.append(HSEA_dict[aa_id])
			else:
				hse_a.append([None,None,None])
		except:
			hse_a.append([None,None,None])
	hse_a=np.array(hse_a)

	try:
		HSEB=PDB.HSExposureCB(s)
	except:
		return 0

	HSEB_dict=HSEB.property_dict
	HSEB_keys=HSEB.property_keys
	HSEB_list=HSEB.property_list

	hse_b=[]
	for a in aa_list_full:
		try:
			aa_id=(a.get_parent().get_id(),a.get_id())
			if HSEB_dict.get(aa_id):
				hse_b.append(HSEB_dict[aa_id])
			else:
				hse_b.append([None,None,None])
		except:
			hse_b.append([None,None,None])

	hse_b=np.array(hse_b)

	seq_list=''
	for a in aa_list_full:
		try:
			t=a.get_resname()
			if t in t_dic:
				seq_list+=t_dic[t]
			else:
				seq_list+='X'
		except:
			seq_list+='X'

	ca_list=[]
	for a in aa_list_full:
		try:
			t=a['CA']
			ca_list.append(t)
		except:
			t=None
			ca_list.append(t)

	cb_list=[]
	for a in aa_list_full:
		try:
			t=a['CB']
			cb_list.append(t)
		except:
			t=None
			cb_list.append(t)

	n_list=[]
	for a in aa_list_full:
		try:
			t=a['N']
			n_list.append(t)
		except:
			t=None
			n_list.append(t)
	c_list=[]
	for a in aa_list_full:
		try:
			t=a['C']
			c_list.append(t)
		except:
			t=None
			c_list.append(t)



	angle=[]                             #三个角两个氨基酸相对位置
	for j in range(len(ca_list)):
		angle_t=[]
		for k in range(len(ca_list)):
			if ca_list[j]!=None and ca_list[k]!=None:
				ca1=ca_list[j].get_vector()
				ca2=ca_list[k].get_vector()
				if cb_list[j]!=None:
					cb=cb_list[j].get_vector()
					t1=PDB.vectors.calc_angle(cb,ca1,ca2)
				else:
					if c_list[j]!=None and n_list[j]!=None and ca_list[j]!=None:
						ca_v=ca_list[j].get_vector().get_array()
						c_v=c_list[j].get_vector().get_array()
						n_v=n_list[j].get_vector().get_array()
						cb=calha1(n_v,c_v,ca_v)
						cb=PDB.vectors.Vector(cb)
						t1=PDB.vectors.calc_angle(cb,ca1,ca2)
					else:
						t1=None
				if n_list[j]!=None:
					n_=n_list[j].get_vector()
					t2=PDB.vectors.calc_angle(n_,ca1,ca2)
				else:
					t2=None
				if c_list[j]!=None:
					c_=c_list[j].get_vector()
					t3=PDB.vectors.calc_angle(c_,ca1,ca2)
				else:
					t3=None
				angle_t.append([t1,t2,t3])
			else:
				angle_t.append([None,None,None])
		angle.append(angle_t)

	angle_d=[]              #六个角
	for j in range(len(angle)):
		angle_dt=[]
		for k in range(len(angle[j])):
			angle_dt.append(angle[j][k]+angle[k][j])
		angle_d.append(angle_dt)
	angle_d=np.array(angle_d)

	ca_num=len(ca_list)
	ca_dist=[]             #CA距离
	for j in range(len(ca_list)):
		for k in range(len(ca_list)):
			if ca_list[j]!=None and ca_list[k]!=None:
				ca_dist.append(ca_list[j]-ca_list[k])
			else:
				ca_dist.append(None)
	
	ca_dist=np.array(ca_dist)
	ca_dist=ca_dist.reshape(ca_num,ca_num)

	mask=[]    #是否有CA
	for j in range(len(ca_list)):
		if ca_list[j]!=None:
			mask.append(1)
		else:
			mask.append(0)
	
	ids=ca_dist==None
	ca_dist[ids]=100   #算不出来距离的设置为100
	ca_dist_cs=[]
	angle_cs=[]
	num_cs=[]
	for j in range(len(ca_dist)):
		t=ca_dist[j]
		s=t.argsort()
		ca_dist_cs.append(t[s[1:17]])
		angle_cs.append(angle_d[j][s[1:17]])
		num_cs.append(s[1:17])

	dic_r={}
	dic_r['dis']=ca_dist_cs
	dic_r['angle']=angle_cs
	dic_r['mask']=mask
	dic_r['ids']=num_cs
	dic_r['seq']=seq_list
	dic_r['dps']=dps
	dic_r['hsea']=hse_a
	dic_r['hseb']=hse_b

	out_name='pdb_other_cb/'+pdbid[i].lower()+pdbchain[i]+'_all.npy'
	np.save(out_name,dic_r)
Esempio n. 5
0
res_name = [res.get_resname() for res in aa_list]

HSEA = PDB.HSExposureCA(s)

HSEA_dict = HSEA.property_dict
HSEA_keys = HSEA.property_keys
HSEA_list = HSEA.property_list

HSEB = PDB.HSExposureCB(s)

HSEB_dict = HSEB.property_dict
HSEB_keys = HSEB.property_keys
HSEB_list = HSEB.property_list

depth = PDB.ResidueDepth(s)
dep_dict = depth.property_dict
dep_keys = depth.property_keys
dep_list = depth.property_list

dssp = PDB.DSSP(s, "3skpFH.pdb")
dssp_dict = dssp.property_dict

nb_dict = {}
nb = PDB.NeighborSearch(ca_list)
for a in ca_list:
    t = nb.search(a.get_coord(), 8)
    aa = a.get_parent()
    aa_id = (aa.get_parent().get_id(), aa.get_id())
    nb_dict[aa_id] = t
Esempio n. 6
0
def get_dis(name):
    p = PDBParser(PERMISSIVE=1)
    pdb_name = name
    try:
        s = p.get_structure("X", pdb_name)
        s = s[0]
    except:
        return None, None, None, None, None, None, None, None

    res_list = PDB.Selection.unfold_entities(s, 'R')
    aa_list = []
    for a in res_list:
        if PDB.is_aa(a):
            aa_list.append(a)

    t = aa_list[0].get_id()[1]
    aa_list_full = []
    error = 0
    for a in aa_list:
        while 1:
            if a.get_id()[1] < t:
                error = 1
                break
            if a.get_id()[1] == t:
                aa_list_full.append(a)
                t += 1
                break
            else:
                aa_list_full.append(None)
                t += 1
    if error == 1:
        return None, None, None, None, None, None, None, None
    try:
        depth = PDB.ResidueDepth(s)
    except:
        return None, None, None, None, None, None, None, None

    dep_dict = depth.property_dict
    dep_keys = depth.property_keys
    dep_list = depth.property_list
    dps = []
    for a in aa_list_full:
        try:
            aa_id = (a.get_parent().get_id(), a.get_id())
            if dep_dict.get(aa_id):
                dps.append(dep_dict[aa_id])
            else:
                dps.append([None, None])
        except:
            dps.append([None, None])
    dps = np.array(dps)

    try:
        HSEA = PDB.HSExposureCA(s)
    except:
        return None, None, None, None, None, None, None, None

    HSEA_dict = HSEA.property_dict
    HSEA_keys = HSEA.property_keys
    HSEA_list = HSEA.property_list
    hse_a = []
    for a in aa_list_full:
        try:
            aa_id = (a.get_parent().get_id(), a.get_id())
            if HSEA_dict.get(aa_id):
                hse_a.append(HSEA_dict[aa_id])
            else:
                hse_a.append([None, None, None])
        except:
            hse_a.append([None, None, None])
    hse_a = np.array(hse_a)

    try:
        HSEB = PDB.HSExposureCB(s)
    except:
        return None, None, None, None, None, None, None, None

    HSEB_dict = HSEB.property_dict
    HSEB_keys = HSEB.property_keys
    HSEB_list = HSEB.property_list

    hse_b = []
    for a in aa_list_full:
        try:
            aa_id = (a.get_parent().get_id(), a.get_id())
            if HSEB_dict.get(aa_id):
                hse_b.append(HSEB_dict[aa_id])
            else:
                hse_b.append([None, None, None])
        except:
            hse_b.append([None, None, None])

    hse_b = np.array(hse_b)

    seq_list = ''
    for a in aa_list_full:
        try:
            t = a.get_resname()
            if t in t_dic:
                seq_list += t_dic[t]
            else:
                seq_list += 'X'
        except:
            seq_list += 'X'

    ca_list = []
    for a in aa_list_full:
        try:
            t = a['CA']
            ca_list.append(t)
        except:
            t = None
            ca_list.append(t)

    cb_list = []
    for a in aa_list_full:
        try:
            t = a['CB']
            cb_list.append(t)
        except:
            t = None
            cb_list.append(t)

    n_list = []
    for a in aa_list_full:
        try:
            t = a['N']
            n_list.append(t)
        except:
            t = None
            n_list.append(t)
    c_list = []
    for a in aa_list_full:
        try:
            t = a['C']
            c_list.append(t)
        except:
            t = None
            c_list.append(t)

    angle = []
    for j in range(len(ca_list)):
        angle_t = []
        for k in range(len(ca_list)):
            if ca_list[j] != None and ca_list[k] != None:
                ca1 = ca_list[j].get_vector()
                ca2 = ca_list[k].get_vector()
                if cb_list[j] != None:
                    cb = cb_list[j].get_vector()
                    t1 = PDB.vectors.calc_angle(cb, ca1, ca2)
                else:
                    if c_list[j] != None and n_list[j] != None and ca_list[
                            j] != None:
                        ca_v = ca_list[j].get_vector().get_array()
                        c_v = c_list[j].get_vector().get_array()
                        n_v = n_list[j].get_vector().get_array()
                        cb = calha1(n_v, c_v, ca_v)
                        cb = PDB.vectors.Vector(cb)
                        t1 = PDB.vectors.calc_angle(cb, ca1, ca2)
                    else:
                        t1 = None
                if n_list[j] != None:
                    n_ = n_list[j].get_vector()
                    t2 = PDB.vectors.calc_angle(n_, ca1, ca2)
                else:
                    t2 = None
                if c_list[j] != None:
                    c_ = c_list[j].get_vector()
                    t3 = PDB.vectors.calc_angle(c_, ca1, ca2)
                else:
                    t3 = None
                angle_t.append([t1, t2, t3])
            else:
                angle_t.append([None, None, None])
        angle.append(angle_t)

    angle_d = []
    for j in range(len(angle)):
        angle_dt = []
        for k in range(len(angle[j])):
            angle_dt.append(angle[j][k] + angle[k][j])
        angle_d.append(angle_dt)
    angle_d = np.array(angle_d)

    ca_num = len(ca_list)
    ca_dist = []
    for j in range(len(ca_list)):
        for k in range(len(ca_list)):
            if ca_list[j] != None and ca_list[k] != None:
                ca_dist.append(ca_list[j] - ca_list[k])
            else:
                ca_dist.append(None)

    ca_dist = np.array(ca_dist)
    ca_dist = ca_dist.reshape(ca_num, ca_num)

    mask = []
    for j in range(len(ca_list)):
        if ca_list[j] != None:
            mask.append(1)
        else:
            mask.append(0)

    ids = ca_dist == None
    ca_dist[ids] = 100
    ca_dist_cs = []
    angle_cs = []
    num_cs = []
    for j in range(len(ca_dist)):
        t = ca_dist[j]
        s = t.argsort()
        ca_dist_cs.append(t[s[1:17]])
        angle_cs.append(angle_d[j][s[1:17]])
        num_cs.append(s[1:17])

    return seq_list, num_cs, mask, ca_dist_cs, angle_cs, dps, hse_a, hse_b