def gen(): for iT, T in enumerate(Ts): mytraces = [ traces[:T, ((iT * N + n) * d):((iT * N + n + 1) * d)] for n in range(N) ] yield Trajectory.fromArray(mytraces)
def gen(): for (traj, mytags) in dataset(giveTags=True): try: traces = sampleMSD(msd[:len(traj)], n=traj.N * traj.d, subtractMean=True) except np.linalg.LinAlgError: # pragma: no cover raise RuntimeError( "Could not generate trajectories from provided (or ensemble) MSD. Try using something cleaner." ) newdata = np.array([ traces[:, (i * traj.d):((i + 1) * traj.d)] for i in range(traj.N) ]) newdata += np.nanmean(traj.data, axis=1, keepdims=True) newdata[np.isnan(traj.data)] = np.nan newmeta = deepcopy(traj.meta) # Remove those meta entries that explicitly depend on the data for key in ['MSD', 'MSDmeta', 'chi2scores']: try: del newmeta[key] except KeyError: pass yield (Trajectory.fromArray(newdata, **newmeta), deepcopy(mytags))
def trajectory_from_loopingprofile(self, profile, localization_error=0., missing_frames=None): # Pre-proc missing_frames if missing_frames is None: missing_frames = np.array([], dtype=int) if np.isscalar(missing_frames): if 0 < missing_frames and missing_frames < 1: missing_frames = np.nonzero( np.random.rand(len(profile)) < missing_frames)[0] else: missing_frames = np.random.choice(len(profile), size=missing_frames, replace=False) missing_frames = missing_frames.astype(int) # Note that the distributions in the model give us only the length, not # the orientation. So we also have to sample unit vectors # Furthermore, localization_error should not be added, since # self.distributions already contain it. It will be written to the # meta entry though! magnitudes = np.array( [self.distributions[state].rvs() for state in profile[:]]) data = np.random.normal(size=(len(magnitudes), self.d)) data *= np.expand_dims(magnitudes / np.linalg.norm(data, axis=1), 1) data[missing_frames, :] = np.nan return Trajectory.fromArray( data, localization_error=np.array(self.d * [localization_error]), loopingprofile=profile, )
def trajectory_from_loopingprofile( self, profile, localization_error=None, missing_frames=None, ): # Pre-processing # localization_error if localization_error is None: if self.localization_error is None: raise ValueError( "Need to specify either localization_error or model.localization_error" ) # pragma: no cover else: localization_error = self.localization_error if np.isscalar(localization_error): localization_error = self.d * [localization_error] localization_error = np.asarray(localization_error) if localization_error.shape != (self.d, ): raise ValueError( "Did not understand localization_error") # pragma: no cover # missing_frames if missing_frames is None: missing_frames = np.array([], dtype=int) if np.isscalar(missing_frames): if 0 < missing_frames and missing_frames < 1: missing_frames = np.nonzero( np.random.rand(len(profile)) < missing_frames)[0] else: missing_frames = np.random.choice(len(profile), size=missing_frames, replace=False) missing_frames = missing_frames.astype(int) # Assemble trajectory data = np.empty((len(profile), self.d), dtype=float) data[:] = np.nan model = self.models[profile[0]] conf = model.conf_ss() data[0, :] = self.measurement @ conf for i in range(1, len(profile)): model = self.models[profile[i]] conf = model.evolve(conf) data[i, :] = self.measurement @ conf # Kick out frames that should be missing data[missing_frames, :] = np.nan # Return as Trajectory noise = localization_error[np.newaxis, :] return Trajectory.fromArray( data + noise * np.random.normal(size=data.shape), localization_error=localization_error, loopingprofile=profile, )
def trajectory_from_loopingprofile(self, profile, localization_error=None, missing_frames=None, ): """ Generative model Parameters ---------- profile : Loopingprofile the profile from whose associated ensemble to sample localization_error : float or (d,) np.ndarray, dtype=float see `MultiStateModel.trajectory_from_loopingprofile` missing_frames : None, float in [0, 1), int, or np.ndarray see `MultiStateModel.trajectory_from_loopingprofile` Returns ------- Trajectory """ # Pre-processing # localization_error if localization_error is None: if self.localization_error is None: raise ValueError("Need to specify either localization_error or model.localization_error") # pragma: no cover else: localization_error = self.localization_error localization_error = super().trajectory_from_loopingprofile(profile, preproc='localization_error', localization_error=localization_error) # missing_frames missing_frames = super().trajectory_from_loopingprofile(profile, preproc='missing_frames', missing_frames=missing_frames) # Assemble trajectory data = np.empty((len(profile), self.d), dtype=float) data[:] = np.nan model = self.models[profile[0]] conf = model.conf_ss() data[0, :] = self.measurement @ conf for i in range(1, len(profile)): model = self.models[profile[i]] conf = model.evolve(conf) data[i, :] = self.measurement @ conf # Kick out frames that should be missing data[missing_frames, :] = np.nan # Add localization error data += localization_error[None, :] * np.random.normal(size=data.shape) # Return as Trajectory return Trajectory.fromArray(data, localization_error=localization_error, loopingprofile=profile, )
def trajectory_from_loopingprofile(self, profile, localization_error=0., missing_frames=None): """ Generative model Parameters ---------- profile : Loopingprofile the profile from whose associated ensemble to sample localization_error : float or (d,) np.ndarray, dtype=float see `MultiStateModel.trajectory_from_loopingprofile`; note that since the localization error should already be accounted for in the `distributions` of the model, it is *not* added to the trajectory here. Instead, it is just written to ``traj.meta['localization_error']``. missing_frames : None, float in [0, 1), int, or np.ndarray see `MultiStateModel.trajectory_from_loopingprofile` Returns ------- Trajectory Notes ----- The `FactorizedModel` only contains distributions for the scalar distance between the points, amounting to the assumption that the full distribution of distance vectors is isotropic. Thus, in generating the trajectory we sample a magnitude from the given distributions and a direction from the unit sphere. """ # Pre-proc localization_error = super().trajectory_from_loopingprofile(profile, preproc='localization_error', localization_error=localization_error) missing_frames = super().trajectory_from_loopingprofile(profile, preproc='missing_frames', missing_frames=missing_frames) # Note that the distributions in the model give us only the length, not # the orientation. So we also have to sample unit vectors # Furthermore, localization_error should not be added, since # self.distributions already contain it. It will be written to the # meta entry though! magnitudes = np.array([self.distributions[state].rvs() for state in profile[:]]) data = np.random.normal(size=(len(magnitudes), self.d)) data *= np.expand_dims(magnitudes / np.linalg.norm(data, axis=1), 1) data[missing_frames, :] = np.nan return Trajectory.fromArray(data, localization_error=localization_error, loopingprofile=profile, )
def trajectory_from_loopingtrace(self, loopingtrace, localization_error=0., d=3): # Note that the distributions in the model give us only the length, not # the orientation. So we also have to sample unit vectors # Furthermore, localization_error should not be added, since # self.distributions already contain it arr = np.empty((loopingtrace.T, d)) arr[:] = np.nan magnitudes = np.array( [self.distributions[state].rvs() for state in loopingtrace.state]) vectors = np.random.normal(size=(len(magnitudes), d)) vectors *= np.expand_dims(magnitudes / np.linalg.norm(vectors, axis=1), 1) arr[loopingtrace.t, :] = vectors return Trajectory.fromArray( arr, localization_error=np.array(d * [localization_error]), loopingtrace=loopingtrace, )
def trajectory_from_loopingtrace(self, loopingtrace, localization_error=None, d=3): if localization_error is None: if self.localization_error is None: raise ValueError( "Need to specify either localization_error or model.localization_error" ) else: localization_error = self.localization_error if np.isscalar(localization_error): localization_error = d * [localization_error] localization_error = np.asarray(localization_error) if localization_error.shape != (d, ): raise ValueError("Did not understand localization_error") arr = np.empty((loopingtrace.T, d)) arr[:] = np.nan cur_mod = self.models[loopingtrace[0]] conf = cur_mod.conf_ss(True, d) arr[loopingtrace.t[0], :] = cur_mod.measurement @ conf for i in range(1, len(loopingtrace)): cur_mod = self.models[loopingtrace[i]] conf = cur_mod.evolve(conf, True, loopingtrace.t[i] - loopingtrace.t[i - 1]) arr[loopingtrace.t[i], :] = cur_mod.measurement @ conf return Trajectory.fromArray( arr + localization_error[np.newaxis, :] * np.random.normal(size=arr.shape), localization_error=localization_error, loopingtrace=loopingtrace, )
def csv(filename, columns=['x', 'y', 't', 'id'], tags=None, meta_post={}, **kwargs): """ Load data from a .csv file. This uses ``np.genfromtxt``, and all kwargs are forwarded to it. By default, we assume the delimiter ``','`` and utf8 encoding for string data, but these can of course be changed. Refer to ``numpy.genfromtxt``. Parameters ---------- filename : string or file-like object the file to be read columns : list how to interpret the columns in the file. Use any of these identifiers: ``{'x', 'y', 'z', 'x2', 'y2', 'z2', 't', 'id', None}``, where ``'t'`` (mandatory) is the frame number, ``'id'`` (mandatory) the trajectory id, and the remaining ones can be used to indicate spatial components of single or double-locus trajectories. Use ``None`` to indicate a column that should be ignored. Columns beyond the list given here will be ignored in any case. Finally, the data for any str identifier not matching one of the above will be written to a corresponding entry in the trajectory's `meta` dict. tags : str, list of str or set of str, optional the tag(s) to be associated with trajectories from this file meta_post : dict, optional post-processing options for the `meta` data. Keys should be `meta` field names, values can be "unique" or "mean". With the former, all the values in the corresponding column should be the same, and only that value (instead of the whole array) will be written into the meta field. With the latter we simply take the mean of the array. Returns ------- TaggedSet the loaded data set Examples -------- This function can be used to load data from ``pandas.DataFrame`` tables, if they conform to the format described above: >>> import io ... import pandas as pd ... import tracklib as tl ... ... # Set up a DataFrame containing some dummy data ... # Caveat to pay attention to: the order of the columns is important! ... df = pd.DataFrame() ... df['frame_no'] = [1, 2, 3] ... df['trajectory_id'] = [4, 4, 4] ... df['coord1'] = [1, 2, 3] ... df['coord2'] = [4, 5, 6] ... ... csv_stream = io.StringIO(df.to_csv()) ... dataset = tl.io.load.csv(csv_stream, ... [None, 't', 'id', 'x', 'y'], # first column will be index ... delimiter=',', # pandas' default ... skip_header=1, # pandas prints a header line ... ) """ col_inds = {} for i, key in enumerate(columns): if type(key) == str: # make sure to exclude None's col_inds[key] = i keys = col_inds.keys() assert 'id' in keys assert 't' in keys # Get shape of trajectory and check that the given keys make sense if 'z' in keys: d = 3 assert 'y' in keys assert 'x' in keys elif 'y' in keys: d = 2 assert 'x' in keys elif 'x' in keys: d = 1 else: # pragma: no cover raise ValueError( "No valid coordinates found in specification: {}".format(columns)) # data_keys = ['x', 'x2', 'y', 'y2', ...] data_keys = sorted(keys & {'x', 'y', 'z', 'x2', 'y2', 'z2'}) N = 1 if any('2' in key for key in keys): N = 2 for key in data_keys: assert key[0] in data_keys assert key[0] + '2' in data_keys # ['id', 't', {data}, {meta}] # this is mostly to keep track of which keys exist sorted_keys = ['id', 't', data_keys, keys - {'id', 't', *data_keys}] # Read data gft_kwargs = dict(delimiter=',', dtype=None, encoding='utf8') gft_kwargs.update(kwargs) data = np.genfromtxt(filename, **gft_kwargs) # This feels suboptimal... maybe there's a better way? data_cols = [col_inds[key] for key in sorted_keys[2]] meta_cols = [col_inds[key] for key in sorted_keys[3]] try: # sorted_data = [id-array, t-array, data-array, list of meta-arrays] sorted_data = [ np.array([line[col_inds['id']] for line in data]), np.array([line[col_inds['t']] for line in data]).astype(int), np.array([ [line[col] for col in data_cols] for line in data ]).astype(float), # shape: (-1, N*d), sorted x, x2, y, ... [np.array([line[col] for line in data]) for col in meta_cols], ] except IndexError: raise ValueError( "Too many columns for file. Did you use the right delimiter?") del data ids = set(sorted_data[0]) # Assemble data set out = TaggedSet() for myid in ids: ind = sorted_data[0] == myid mydata = np.moveaxis(sorted_data[2][ind].reshape((-1, d, N)), 2, 0) myt = sorted_data[1][ind] myt -= np.min(myt) trajdata = np.empty((N, np.max(myt) + 1, d), dtype=float) trajdata[:] = np.nan trajdata[:, myt, :] = mydata meta = {} for i, key in enumerate(sorted_keys[3]): mymeta = sorted_data[3][i][ind] if key in meta_post: post = meta_post[key] if post == 'unique': ms = set(mymeta) if len(ms) > 1: raise RuntimeError( "Data in column '{}' is not unique for trajectory with id {}" .format(key, myid)) meta[key] = ms.pop() elif post == 'mean': meta[key] = np.mean(mymeta) elif post == 'nanmean': meta[key] = np.nanmean(mymeta.astype(float)) else: # pragma: no cover raise ValueError(f"invalid meta post-proc: {post}") else: # assume that we have floats and fill with nan's meta[key] = np.empty(np.max(myt) + 1, dtype=float) meta[key][:] = np.nan meta[key][myt] = mymeta out.add(Trajectory.fromArray(trajdata, **meta), tags) return out