/
pdb.py
504 lines (412 loc) · 16.6 KB
/
pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
"""
=========================
PDB file input and output
=========================
Reading and writing files from the PDB file format.
PDB files
---------
The PDB file format is sometimes used for reading and writing information about
tractography results. The *nominal* PDB file format specification is as
follows, but note that some of these things are not implemented in PDB version
3. For example, there are no algorithms to speak of, so that whole bit is
completely ignored.
The file-format is organized as a semi-hierarchical data-base, according to
the following specification:
[ header size] - int
-- HEADER FOLLOWS --
[4x4 xform matrix ] - 16 doubles
[ number of pathway statistics ] - int
for each statistic:
[ currently unused ] - bool
[ is stat stored per point, or aggregate per path? ] - bool
[ currently unused ] - bool
[ name of the statistic ] - char[255]
[ currently unused ] - char[255]
[ unique ID - unique identifier for this stat across files ] - int
** The algorithms bit is not really working as advertised: **
[ number of algorithms ] - int
for each algorithm:
[ algorithm name ] - char[255]
[ comments about the algorithm ] - char[255]
[ unique ID - unique identifier for this algorithm, across files ] - int
[ version number ] - int
-- HEADER ENDS --
[ number of pathways ] - int
[ pts per fiber ] - number of pathways integers
for each pathway:
[ header size ] - int
-- PATHWAY HEADER FOLLOWS --
** The following are not actually encoded in the fiber header and are
currently set in an arbitrary fashion: **
[ number of points ] - int
[ algorithm ID ] - int
[ seed point index ] - int
for each statistic:
[ precomputed statistical value ] - double
-- PATHWAY HEADER ENDS --
for each point:
[ position of the point ] - 3 doubles (ALREADY TRANSFORMED from
voxel space!)
for each statistic:
IF computed per point (see statistics header, second bool field):
for each point:
[ statistical value for this point ] - double
"""
# Import from standard lib:
import struct
import os
import inspect
import warnings
import urllib
import zipfile
import numpy as np
import scipy.io as sio
import scipy.stats as stats
import nibabel as ni
import nibabel.trackvis as tv
# This one's a global used in both packing and unpacking the data
_fmt_dict = {'int':['=i', 4],
'double':['=d', 8],
'char':['=c', 1],
'bool':['=?', 1],
#'uint':['=I', 4],
}
def _unpacker(file_read, idx, obj_to_read, fmt='int'):
"""
Helper function to unpack binary data from files with the struct library.
Relies on http://docs.python.org/library/struct.html
Parameters
----------
file_read: The output of file.read() from a file object
idx: An index into x
obj_to_read: How many objects to read
fmt: A format string, telling us what to read from there
"""
# For each one, this is [fmt_string, size]
fmt_str = _fmt_dict[fmt][0]
fmt_sz = _fmt_dict[fmt][1]
out = np.array([struct.unpack(fmt_str,
file_read[idx + fmt_sz * j:idx + fmt_sz + fmt_sz * j])[0]
for j in range(obj_to_read)])
idx += obj_to_read * fmt_sz
return out, idx
def _packer(file_write, vals, fmt='int'):
"""
Helper function to pack binary data to files, using the struct library:
Relies on http://docs.python.org/library/struct.html
"""
fmt_str = _fmt_dict[fmt][0]
if np.iterable(vals):
for pack_this in vals:
s = struct.pack(fmt_str, pack_this)
file_write.write(s)
else:
s = struct.pack(fmt_str, vals)
file_write.write(s)
def _word_maker(arr):
"""
Helper function Make a string out of pdb stats header "name" variables
"""
make_a_word = []
for this in arr:
if this: # The sign that you reached the end of the word is an empty
# char
make_a_word.append(this)
else:
break
return ''.join(make_a_word)
def _char_list_maker(name):
"""
Helper function that does essentially the opposite of _word_maker. Takes a
string and makes it into a 255 long list of characters with the name of a
stat, followed by a single white-space and then 'g' for the rest of the 255
"""
l = list(name)
l.append('\x00') # The null character
while len(l)<255:
l.append('g')
return l
def _stat_hdr_set(fwrite, stat, uid):
"""
Helper function for writing stuff into stats header portion of pdb files
"""
# Name of the stat:
char_list = _char_list_maker(stat)
_packer(fwrite, char_list, 'char')
_packer(fwrite, char_list, 'char') # Twice for some reason
_packer(fwrite, ['g','g'], 'char') # Add this, so that that the uid ends
# up "word-aligned".
_packer(fwrite, uid) # These might get reordered upon
# resaving on different platforms, because
# dict keys come in no particular order...
def read(file_name, verbose=True):
"""
Read the definition of a fiber-group from a .pdb file
Parameters
----------
file_name: str
Full path to the .pdb file
Returns
-------
dict
Note
----
This only reads Version 3 PDB files.
"""
# Read the file as binary info:
f_obj = file(file_name, 'r')
f_read = f_obj.read()
f_obj.close()
# This is an updatable index into this read:
idx = 0
# First part is an int encoding the offset to the fiber part:
offset, idx = _unpacker(f_read, idx, 1)
# Next bit are doubles, encoding the xform (4 by 4 = 16 of them):
xform, idx = _unpacker(f_read, idx, 16, 'double')
xform = np.reshape(xform, (4, 4))
# Next is an int encoding the number of stats:
numstats, idx = _unpacker(f_read, idx, 1)
# The stats header is a dict with lists holding the stat per
stats_header = dict(luminance_encoding=[], # int => bool
computed_per_point=[], # int => bool
viewable=[], # int => bool
agg_name=[], # char array => string
local_name=[], # char array => string
uid=[] # int
)
# Read the stats header:
counter = 0
while counter < numstats:
counter += 1
for k in ["luminance_encoding",
"computed_per_point",
"viewable"]:
this, idx = _unpacker(f_read, idx, 1)
stats_header[k].append(np.bool(this))
for k in ["agg_name", "local_name"]:
this, idx = _unpacker(f_read, idx, 255, 'char')
stats_header[k].append(_word_maker(this))
# Must have integer reads be word aligned (?):
idx += 2
this, idx = _unpacker(f_read, idx, 1)
stats_header["uid"].append(this)
# We skip the whole bit with the algorithms and go straight to the version
# number, which is one int length before the fibers:
idx = offset - 4
version, idx = _unpacker(f_read, idx, 1)
if int(version) < 2:
raise ValueError("Can only read PDB version 2 or version 3 files")
elif verbose:
print("Loading a PDB version %s file from: %s"%(int(version), file_name))
if int(version) == 2:
idx = offset
# How many fibers?
numpaths, idx = _unpacker(f_read, idx, 1)
if int(version) == 2:
pts = []
if verbose:
prog_bar = ProgressBar(numpaths[0])
f_name = inspect.stack()[0][3]
f_stats = []
n_stats = []
for p_idx in range(numpaths):
f_stats_dict = {}
n_stats_dict = {}
# Keep track of where you are right now
ppos = idx
path_offset, idx = _unpacker(f_read, idx, 1)
n_nodes, idx = _unpacker(f_read, idx, 1)
# As far as I can tell the following two don't matter much:
algo_type, idx = _unpacker(f_read, idx, 1)
seed_pt_idx, idx = _unpacker(f_read, idx, 1)
# Read out the per-path stats:
for stat_idx in range(numstats):
per_fiber_stat, idx = _unpacker(f_read, idx, 1, 'double')
f_stats_dict[stats_header["local_name"][stat_idx]] = \
per_fiber_stat
f_stats.append(f_stats_dict)
# Skip forward to where the paths themselves are:
idx = ppos
# Read the nodes:
pathways, idx = _unpacker(f_read, idx, n_nodes*3, 'double')
pts.append(np.reshape(pathways, (n_nodes, 3)).T)
for stat_idx in range(numstats):
if stats_header["computed_per_point"][stat_idx]:
name = stats_header["local_name"][stat_idx]
n_stats_dict[name], idx = _unpacker(f_read, idx, n_nodes,
'double')
n_stats.append(n_stats_dict)
fibers = []
# Initialize all the fibers:
for p_idx in range(numpaths):
this_fstats_dict = f_stats[p_idx]
f_stat_k = this_fstats_dict.keys()
f_stat_v = [this_fstats_dict[k] for k in f_stat_k]
this_nstats_dict = n_stats[p_idx]
n_stats_k = this_nstats_dict.keys()
n_stats_v = [this_nstats_dict[k] for k in n_stats_k]
fibers.append(ozf.Fiber(pts[p_idx],
xform,
fiber_stats=dict(zip(f_stat_k, f_stat_v)),
node_stats=dict(zip(n_stats_k, n_stats_v))))
elif int(version) == 3:
# The next few bytes encode the number of points in each fiber:
pts_per_fiber, idx = _unpacker(f_read, idx, numpaths)
total_pts = np.sum(pts_per_fiber)
# Next we have the xyz coords of the nodes in all fibers:
fiber_pts, idx = _unpacker(f_read, idx, total_pts * 3, 'double')
# We extract the information on a fiber-by-fiber basis
pts_read = 0
pts = []
if verbose:
prog_bar = ProgressBar(numpaths[0])
f_name = inspect.stack()[0][3]
for p_idx in range(numpaths):
n_nodes = pts_per_fiber[p_idx]
pts.append(np.reshape(
fiber_pts[pts_read * 3:(pts_read + n_nodes) * 3],
(n_nodes, 3)).T)
pts_read += n_nodes
if verbose:
prog_bar.animate(p_idx, f_name=f_name)
f_stats_dict = {}
for stat_idx in range(numstats):
per_fiber_stat, idx = _unpacker(f_read, idx, numpaths, 'double')
# This is a fiber-stat only if it's not computed per point:
if not stats_header["computed_per_point"][stat_idx]:
f_stats_dict[stats_header["local_name"][stat_idx]] =\
per_fiber_stat
per_point_stat = []
n_stats_dict = {}
for stat_idx in range(numstats):
pts_read = 0
# If it is computer per point, it's a node-stat:
if stats_header["computed_per_point"][stat_idx]:
name = stats_header["local_name"][stat_idx]
n_stats_dict[name] = []
per_point_stat, idx = _unpacker(f_read, idx, total_pts, 'double')
for p_idx in range(numpaths):
n_stats_dict[name].append(
per_point_stat[pts_read:pts_read + pts_per_fiber[p_idx]])
pts_read += pts_per_fiber[p_idx]
else:
per_point_stat.append([])
fiber_group = dict(fibers=[], fiber_stats=[], node_stats=[])
# Initialize all the fibers:
for p_idx in range(numpaths):
f_stat_k = f_stats_dict.keys()
f_stat_v = [f_stats_dict[k][p_idx] for k in f_stat_k]
n_stats_k = n_stats_dict.keys()
n_stats_v = [n_stats_dict[k][p_idx] for k in n_stats_k]
fiber_group.append(ozf.Fiber(pts[p_idx],
xform,
fiber_stats=dict(zip(f_stat_k, f_stat_v)),
node_stats=dict(zip(n_stats_k, n_stats_v))))
if verbose:
print("Done reading from file")
name = os.path.split(file_name)[-1].split('.')[0]
fiber_group['name'] = name
fiber_group['affine'] = xform
return fiber_group
def write(fg, file_name, verbose=True, affine=None):
"""
Create a pdb file from a osmosis.fibers.FiberGroup class instance.
Parameters
----------
fg: a FiberGroup object
file_name: str
Full path to the pdb file to be saved.
"""
fwrite = file(file_name, 'w')
# The total number of stats are both node-stats and fiber-stats:
n_stats = len(fiber_stats.keys()) + len(node_stats.keys())
stats_hdr_sz = (4 * _fmt_dict['int'][1] + 2 * _fmt_dict['char'][1] * 255 + 2)
# This is the 'offset' to the beginning of the fiber-data. Note that we are
# just skipping the whole algorithms thing, since that seems to be unused
# in the mrDiffusion implementation of this file-format, from this was
# adpated:
hdr_sz = (4 * _fmt_dict['int'][1] + # ints: hdr_sz itself, n_stats, n_algs
# (always 0), version
16 *_fmt_dict['double'][1] + # doubles: the 4 by 4 affine
n_stats * stats_hdr_sz) # The stats part of the header, add
# one for good measure(?).
_packer(fwrite, hdr_sz)
if affine is None:
if fg.affine is None:
affine = tuple(np.eye(4).ravel().squeeze())
else:
affine = tuple(np.array(fg.affine).ravel().squeeze())
else:
affine = tuple(np.array(affine).ravel().squeeze())
_packer(fwrite, affine, 'double')
_packer(fwrite, n_stats)
# We are going to assume that fibers are homogenous on the following
# properties. XXX Should we check that when making FiberGroup instances?
uid = 0
for f_stat in fg[0].fiber_stats:
_packer(fwrite, True) # currently unused
_packer(fwrite, False) # Is this per-point?
_packer(fwrite, True) # currently unused
_stat_hdr_set(fwrite, f_stat, uid)
uid += 1 # We keep tracking that across fiber and node stats
for n_stat in fg[0].node_stats:
# Three True bools for this one:
for x in range(3):
_packer(fwrite, True)
_stat_hdr_set(fwrite, n_stat, uid)
uid += 1
_packer(fwrite, 0) # Number of algorithms - set to 0 always
fwrite.seek(hdr_sz - _fmt_dict['int'][1])
# This is the PDB file version:
_packer(fwrite, 3)
_packer(fwrite, fg.n_fibers)
for fib in fg.fibers:
# How many coords in each fiber:
_packer(fwrite, fib.coords.shape[-1])
# x,y,z coords in each fiber:
for fib in fg.fibers:
_packer(fwrite, fib.coords.T.ravel(), 'double')
for stat in fg[0].fiber_stats:
for fib in fg.fibers:
_packer(fwrite, fib.fiber_stats[stat],'double')
# The per-node stats have to be inserted in here as well, with their mean
# value:
for stat in fg[1].node_stats:
for fib in fg.fibers:
_packer(fwrite, np.mean(fib.node_stats[stat]), 'double')
for stat in fg[1].node_stats:
for fib in fg.fibers:
_packer(fwrite, fib.node_stats[stat], 'double')
if verbose:
"Done saving data in file%s"%file_name
fwrite.close()
def fg_from_trk(trk_file, affine=None):
"""
Read data from a trackvis .trk file and create a FiberGroup object
according to the information in it.
"""
# Generate right away, since we're going to do it anyway:
read_trk = tv.read(trk_file, as_generator=False)
fibers_trk = read_trk[0]
# Per default read from the affine from the file header:
if affine is not None:
aff = affine
else:
hdr = read_trk[1]
aff= tv.aff_from_hdr(hdr)
# If the header contains a bogus affine, we revert to np.eye(4), so we
# don't get into trouble later:
try:
np.matrix(aff).getI()
except np.linalg.LinAlgError:
e_s = "trk file contains bogus header, reverting to np.eye(4)"
warnings.warn(e_s)
aff = np.eye(4)
fibers = []
for f in fibers_trk:
fibers.append(ozf.Fiber(np.array(f[0]).T,affine=aff))
return ozf.FiberGroup(fibers, affine=aff)
def trk2pdb(trk_file, pdb_file):
pass
def pdb2trk(pdb_file, trk_file):
pass