/
datatypes.py
311 lines (252 loc) · 9.74 KB
/
datatypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
import logging
from numpy import all, asarray, sign, vstack, ndarray, arange
from operator import and_
from operator import isSequenceType, attrgetter
from collections import Iterable
from fileIO import load, toSettings, fileIOError
import constants
import copy
from fancydict import nesteddict
from contextlib import contextmanager
logger = logging.getLogger(__name__)
logger.setLevel(constants.logLevel)
logger.addHandler(constants.logHandler)
TYPE_CHECKING = 'STRICT'
FretData_fields = ('time', 'donor', 'acceptor', 'fret')
TrapData_fields = ('ext', 'f', 'sep')
class Mask(ndarray):
'''Not used! Here as an example of subclassing ndarray'''
def __new__(cls, bool_array, *args):
assert isinstance(bool_array, ndarray)
obj = asarray(bool_array, dtype='bool').view(cls)
return obj
def above(self, above_func):
pass
@contextmanager
def transform_error(original_error, to_error):
try:
yield
except original_error:
raise to_error
_index_to_column_error = lambda i: transform_error(IndexError, ValueError("Data does not have column %d" % i))
def _field_properties(fields):
'''Create name,property tuples to access column i in data (DataType)
'''
# Must define fget/fset in this way OUTSIDE of for loop
def property_funcs(i):
def fget(self):
with _index_to_column_error(i):
return self.data[i].view() if len(self.data.shape)==1 else self.data[:,i].view()
def fset(self, value):
with _index_to_column_error(i):
self.data[:,i] = value
return fget,fset
for index,name in enumerate(fields):
fget, fset = property_funcs(index)
yield name, property(fget, fset, doc="Field property %s (%d)" % (name,index))
class DataType(type):
"""Metaclass for building datatypes with _fields attribute access"""
def __new__(cls, name, bases, clsdict):
# Add the properties (e.g. 'ext','f','sep' for TrapData) to the class before creating it
clsdict.update(
_field_properties(clsdict.get('_fields',[]))
)
obj = super(DataType, cls).__new__(cls, name, bases, clsdict)
return obj
class Data(object):
__metaclass__ = DataType
def __init__(self, data, meta={}):
data = asarray(data)
if data is not None and not self._is_data_shape_ok(data.shape):
logger.warning('TrapData should have fields for {}'.format(self._fields))
self.data = data
self._original_data = None
self.metadata = nesteddict.from_dict(meta)
@classmethod
def _is_data_shape_ok(self, shape):
field_size = len(self._fields)
return len(shape) == 1 and shape[0] == field_size or shape[1] == field_size
@classmethod
def fromObject(cls, obj):
try:
return cls(copy.copy(obj.data), obj.metadata)
except AttributeError:
raise ValueError(
'Constructor method only takes object with Data interface')
@classmethod
def fromFile(cls, filename):
try:
meta, data = load(filename, comments=toSettings)
except fileIOError as e:
if not e.isError:
print e.strerror
data = load(filename)
meta = {}
else:
raise
else:
meta['filename'] = filename
me = cls(data, meta)
return me
@classmethod
def aggregate(cls, dataiter, sort_by=None):
assert isinstance(dataiter, Iterable)
assert len(dataiter) > 0
assert all(isinstance(d, Data)
or d is None for d in dataiter)
if sort_by and sort_by not in cls._fields:
raise ValueError(
'sort_by argument must be a field in this data type')
key = lambda e: e[cls._fields.index(sort_by)] if sort_by else None
data = map(attrgetter('data'), dataiter)
return cls(asarray(sorted(vstack(data), key=key)))
flatten = aggregate
@classmethod
def fromFields(cls, *args, **meta):
if len(args) != len(cls._fields):
raise ValueError(
"Number of arguments to {0} must match _fields".format(cls.__name__))
return cls(asarray(args).T, meta)
def copy(self):
return self.__class__.fromObject(self)
@property
def shape(self):
return self.data.shape
def __len__(self):
return len(self.data)
def __eq__(self, other):
if hasattr(other, 'data'):
return all(self.data == other.data) and self.metadata == other.metadata
else:
return self.data == other
def __ne__(self, other):
return not self == other
def __add__(self, other):
return type(self)(vstack((self.data, other.data)))
def where(self, *conditions):
'''Returns data where ALL conditions are true
'''
return self[reduce(and_, conditions)]
def at(self, **kwargs):
return self.where(*[getattr(self,field)>=value for field,value in kwargs.iteritems()])[0]
@property
def T(self):
return self.data.T
def __iter__(self):
return iter(self.T)
def __getitem__(self, key):
if len(self.data.shape) == 1:
return self.data[key]
return type(self)(self.data[key].view(), self.metadata)
def __repr__(self):
return repr(self.data)
@classmethod
def _normalizeLimits(cls, limits, min_max, assume_max_limit=True):
if limits is None:
return min_max
elif not isSequenceType(limits):
limits = [limits]
if len(limits) == 2:
return limits
elif len(limits) == 1:
if assume_max_limit:
return min_max[0], limits[0]
else:
return limits[0], min_max[1]
class TrapData(Data):
_fields = TrapData_fields
@property
def time(self):
if 'sampling_time' not in self.metadata:
raise AttributeError(
'sampling_time must be set in metadata to calculate time')
return arange(1,len(self))*self.metadata['sampling_time']
def maskFromLimits(self, x=None, f=None, limits=()):
if x is None and f is None:
raise ValueError('Must specify either x limits or f limits')
if limits:
start, stop = limits
ext_fit, f_fit = self.ext[start:stop], self.f[start:stop]
else:
ext_fit, f_fit = self.ext, self.f
min_f, max_f = TrapData._normalizeLimits(f,
min_max=(
min(f_fit), max(f_fit)),
assume_max_limit=True
)
min_ext, max_ext = TrapData._normalizeLimits(x,
min_max=(
min(ext_fit), max(ext_fit)),
assume_max_limit=False
)
between = lambda s, a, b: (s >= a) & (s <= b)
return between(ext_fit, min_ext, max_ext) & between(f_fit, min_f, max_f)
def mask_from_interval(self, ext, f=None):
return self.maskFromLimits(ext, f)
def mask_above(self, above):
'''Return 2 masks: True if above above(ext)'''
assert callable(above)
above = self.f > above(self.ext)
return above
def make_masks(self, intervals):
return map(self.mask_from_interval, intervals)
def select(self, x=None, f=None, ext=None, limits=(0, -1)):
x = x or ext
return self[self.maskFromLimits(x, f, limits)]
@property
def fec(self):
"""Return (ext,force) data
"""
x, f, s = self
return x, f
def meanStiffness(self):
inverseAverage = lambda args: 1 / sum(map(lambda x: 1. / x, args))
return inverseAverage(self.metadata.get('stiffness', constants.stiffness))
def adjustOffset(self, ext=None, force=None):
if ext:
self.ext -= ext
if force:
self.force += force
def recalculate(self, stiffness=None):
if stiffness and len(stiffness) != 2:
raise ValueError('Stiffness must be 2-tuple')
current_k = self.metadata.get('stiffness', stiffness)
new_k = stiffness or current_k
self.metadata['stiffness'] = tuple(new_k)
beadRadii = self.metadata.get('bead_radii', constants.sumOfBeadRadii)
displacement = self.f / min(current_k)
ratio = 1 + min(new_k) / max(new_k)
self.f = displacement * min(new_k)
self.ext = self.sep - beadRadii - displacement * ratio
return self
class FretData(Data):
_fields = FretData_fields
def maskFromLimits(self, time, limits=(0, -1)):
return
def select(self, time=None):
return self[self.maskFromLimits(time)]
def search_monotonic(ar, value):
shifted = ar - value
if value <= ar[0]:
return 0
elif value >= ar[-1]:
return -1
start_sign = sign(shifted[0])
for n, (current, last) in enumerate(and_prev(shifted)):
if sign(current) != start_sign:
return n if abs(current) < abs(last) else min(n - 1, 0)
return -1
def and_prev(iterable, default=None):
last = default
for x in iterable:
yield x, last
last = x
def _hasData(datatype):
fields = getattr(datatype, '_fields', datatype)
if TYPE_CHECKING == 'STRICT':
return lambda obj: all(map(hasattr, [obj] * len(fields), fields))
else:
return lambda obj: len(fields) == len(obj)
hasFretData = _hasData(FretData)
hasTrapData = _hasData(TrapData)
hasTrapFretData = lambda x: hasFretData(x) and hasTrapData(x)