-
Notifications
You must be signed in to change notification settings - Fork 0
/
density.py
107 lines (93 loc) · 2.89 KB
/
density.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python2.7
"""
Run script within pymol
Compute atomic packaging of internal atoms
"""
from pymol import cmd, stored
import scipy as sp
from itertools import product
import os
import pandas as pd
CORES = 4
#PATH = ".../Assignment 2/supplement02"
PATH = ".../project_3_pdbs/"
# to be evaluated radii
THRESHOLD = 13
RADS = sp.linspace(2, THRESHOLD, 25)
def packing(pdb):
"Derive mean packing density of pdb as pd.Series."
cmd.delete('all')
cmd.load(pdb)
cmd.remove('solvent')
# Only heavy atoms
cmd.remove('hydro')
# Compute SAS per atom
cmd.set('dot_solvent', 1)
cmd.get_area('all', load_b=1)
cmd.select('interior', 'b = 0')
counts = pd.Series(0, index=RADS)
vest = pd.Series(0, index=RADS)
# from biggest to smallest radius
for r in RADS[::-1]:
# Counting
counts.loc[r] = cmd.select('extended', 'interior extend {}'.format(r))
cmd.remove('not extended')
# moleculare area
#cmd.set('dot_solvent', 0)
vest[r] = cmd.get_area('all')
# Results
cvdens = counts / vest
counts.index = ["{}_rawcount".format(i) for i in counts.index]
vest.index = ["{}_volume estimate".format(i) for i in vest.index]
cvdens.index = ["{}_cv density".format(i) for i in cvdens.index]
return pd.concat(([counts, cvdens, vest]))
def euclid_step(a, b):
d = sp.sqrt(sum([(a[i] - b[i]) ** 2 for i in range(3)]))
if d < THRESHOLD:
for x in RADS:
if x >= d:
return x
else:
return None
def slowpacking(pdb):
"Derive mean packing density of pdb as pd.Series."
cmd.delete('all')
cmd.load(pdb)
cmd.remove('solvent')
# Only heavy atoms
cmd.remove('hydro')
# Compute SAS per atom
cmd.set('dot_solvent')
cmd.get_area('all', load_b=1)
N = float(cmd.select('interior', 'b = 0'))
internal_coords = [at.coord for at in cmd.get_model('interior').atom]#[1:50]
all_coords = [at.coord for at in cmd.get_model('all').atom]#[1:50]
# Count
counts = pd.Series(0, index=RADS)
for a, b in product(internal_coords, all_coords):
es = euclid_step(a, b)
if es is not None:
counts.loc[es] += 1
counts = counts.cumsum()
# Mean per center atom
meancounts = counts / N
# Normalize to density
volumina = pd.Series(4 / 3.0 * sp.pi * (RADS ** 3), index=RADS)
density = meancounts / volumina
# Correct for center
density -= 1 / (4/3 * sp.pi * RADS ** 3)
# Results
counts.index = ["{}_correctcount".format(i) for i in counts.index]
density.index = ["{}_density".format(i) for i in density.index]
return pd.concat(([counts, density]))
cmd.cd(PATH)
result = []
pdbs = []
for pdb in os.listdir("."):
if not pdb.endswith(".pdb"):
continue
pdbs.append(pdb)
result.append(slowpacking(pdb))
#break
result = pd.DataFrame(result, index=pdbs)
result.to_csv("packing.csv")