Beispiel #1
0
 def gen():
     for iT, T in enumerate(Ts):
         mytraces = [
             traces[:T, ((iT * N + n) * d):((iT * N + n + 1) * d)]
             for n in range(N)
         ]
         yield Trajectory.fromArray(mytraces)
Beispiel #2
0
    def gen():
        for (traj, mytags) in dataset(giveTags=True):
            try:
                traces = sampleMSD(msd[:len(traj)],
                                   n=traj.N * traj.d,
                                   subtractMean=True)
            except np.linalg.LinAlgError:  # pragma: no cover
                raise RuntimeError(
                    "Could not generate trajectories from provided (or ensemble) MSD. Try using something cleaner."
                )
            newdata = np.array([
                traces[:, (i * traj.d):((i + 1) * traj.d)]
                for i in range(traj.N)
            ])
            newdata += np.nanmean(traj.data, axis=1, keepdims=True)
            newdata[np.isnan(traj.data)] = np.nan

            newmeta = deepcopy(traj.meta)
            # Remove those meta entries that explicitly depend on the data
            for key in ['MSD', 'MSDmeta', 'chi2scores']:
                try:
                    del newmeta[key]
                except KeyError:
                    pass

            yield (Trajectory.fromArray(newdata, **newmeta), deepcopy(mytags))
Beispiel #3
0
    def trajectory_from_loopingprofile(self,
                                       profile,
                                       localization_error=0.,
                                       missing_frames=None):
        # Pre-proc missing_frames
        if missing_frames is None:
            missing_frames = np.array([], dtype=int)
        if np.isscalar(missing_frames):
            if 0 < missing_frames and missing_frames < 1:
                missing_frames = np.nonzero(
                    np.random.rand(len(profile)) < missing_frames)[0]
            else:
                missing_frames = np.random.choice(len(profile),
                                                  size=missing_frames,
                                                  replace=False)
                missing_frames = missing_frames.astype(int)

        # Note that the distributions in the model give us only the length, not
        #   the orientation. So we also have to sample unit vectors
        # Furthermore, localization_error should not be added, since
        #   self.distributions already contain it. It will be written to the
        #   meta entry though!
        magnitudes = np.array(
            [self.distributions[state].rvs() for state in profile[:]])
        data = np.random.normal(size=(len(magnitudes), self.d))
        data *= np.expand_dims(magnitudes / np.linalg.norm(data, axis=1), 1)
        data[missing_frames, :] = np.nan

        return Trajectory.fromArray(
            data,
            localization_error=np.array(self.d * [localization_error]),
            loopingprofile=profile,
        )
Beispiel #4
0
    def trajectory_from_loopingprofile(
        self,
        profile,
        localization_error=None,
        missing_frames=None,
    ):
        # Pre-processing
        # localization_error
        if localization_error is None:
            if self.localization_error is None:
                raise ValueError(
                    "Need to specify either localization_error or model.localization_error"
                )  # pragma: no cover
            else:
                localization_error = self.localization_error
        if np.isscalar(localization_error):
            localization_error = self.d * [localization_error]
        localization_error = np.asarray(localization_error)
        if localization_error.shape != (self.d, ):
            raise ValueError(
                "Did not understand localization_error")  # pragma: no cover

        # missing_frames
        if missing_frames is None:
            missing_frames = np.array([], dtype=int)
        if np.isscalar(missing_frames):
            if 0 < missing_frames and missing_frames < 1:
                missing_frames = np.nonzero(
                    np.random.rand(len(profile)) < missing_frames)[0]
            else:
                missing_frames = np.random.choice(len(profile),
                                                  size=missing_frames,
                                                  replace=False)
                missing_frames = missing_frames.astype(int)

        # Assemble trajectory
        data = np.empty((len(profile), self.d), dtype=float)
        data[:] = np.nan

        model = self.models[profile[0]]
        conf = model.conf_ss()
        data[0, :] = self.measurement @ conf

        for i in range(1, len(profile)):
            model = self.models[profile[i]]
            conf = model.evolve(conf)
            data[i, :] = self.measurement @ conf

        # Kick out frames that should be missing
        data[missing_frames, :] = np.nan

        # Return as Trajectory
        noise = localization_error[np.newaxis, :]
        return Trajectory.fromArray(
            data + noise * np.random.normal(size=data.shape),
            localization_error=localization_error,
            loopingprofile=profile,
        )
Beispiel #5
0
    def trajectory_from_loopingprofile(self, profile,
                                       localization_error=None,
                                       missing_frames=None,
                                       ):
        """
        Generative model

        Parameters
        ----------
        profile : Loopingprofile
            the profile from whose associated ensemble to sample
        localization_error : float or (d,) np.ndarray, dtype=float
            see `MultiStateModel.trajectory_from_loopingprofile`
        missing_frames : None, float in [0, 1), int, or np.ndarray
            see `MultiStateModel.trajectory_from_loopingprofile`

        Returns
        -------
        Trajectory
        """
        # Pre-processing
        # localization_error
        if localization_error is None:
            if self.localization_error is None:
                raise ValueError("Need to specify either localization_error or model.localization_error") # pragma: no cover
            else:
                localization_error = self.localization_error
        localization_error = super().trajectory_from_loopingprofile(profile, preproc='localization_error', localization_error=localization_error)

        # missing_frames
        missing_frames = super().trajectory_from_loopingprofile(profile, preproc='missing_frames', missing_frames=missing_frames)

        # Assemble trajectory
        data = np.empty((len(profile), self.d), dtype=float)
        data[:] = np.nan

        model = self.models[profile[0]]
        conf = model.conf_ss()
        data[0, :] = self.measurement @ conf

        for i in range(1, len(profile)):
            model = self.models[profile[i]]
            conf = model.evolve(conf)
            data[i, :] = self.measurement @ conf

        # Kick out frames that should be missing
        data[missing_frames, :] = np.nan

        # Add localization error
        data += localization_error[None, :] * np.random.normal(size=data.shape)

        # Return as Trajectory
        return Trajectory.fromArray(data,
                                    localization_error=localization_error,
                                    loopingprofile=profile,
                                    )
Beispiel #6
0
    def trajectory_from_loopingprofile(self, profile,
                                       localization_error=0.,
                                       missing_frames=None):
        """
        Generative model

        Parameters
        ----------
        profile : Loopingprofile
            the profile from whose associated ensemble to sample
        localization_error : float or (d,) np.ndarray, dtype=float
            see `MultiStateModel.trajectory_from_loopingprofile`; note that
            since the localization error should already be accounted for in the
            `distributions` of the model, it is *not* added to the trajectory
            here. Instead, it is just written to
            ``traj.meta['localization_error']``.
        missing_frames : None, float in [0, 1), int, or np.ndarray
            see `MultiStateModel.trajectory_from_loopingprofile`

        Returns
        -------
        Trajectory

        Notes
        -----
        The `FactorizedModel` only contains distributions for the scalar
        distance between the points, amounting to the assumption that the full
        distribution of distance vectors is isotropic. Thus, in generating the
        trajectory we sample a magnitude from the given distributions and a
        direction from the unit sphere.
        """
        # Pre-proc
        localization_error = super().trajectory_from_loopingprofile(profile, preproc='localization_error', localization_error=localization_error)
        missing_frames = super().trajectory_from_loopingprofile(profile, preproc='missing_frames', missing_frames=missing_frames)

        # Note that the distributions in the model give us only the length, not
        #   the orientation. So we also have to sample unit vectors
        # Furthermore, localization_error should not be added, since
        #   self.distributions already contain it. It will be written to the
        #   meta entry though!
        magnitudes = np.array([self.distributions[state].rvs() for state in profile[:]])
        data = np.random.normal(size=(len(magnitudes), self.d))
        data *= np.expand_dims(magnitudes / np.linalg.norm(data, axis=1), 1)
        data[missing_frames, :] = np.nan

        return Trajectory.fromArray(data,
                                    localization_error=localization_error,
                                    loopingprofile=profile,
                                    )
Beispiel #7
0
    def trajectory_from_loopingtrace(self,
                                     loopingtrace,
                                     localization_error=0.,
                                     d=3):
        # Note that the distributions in the model give us only the length, not
        #   the orientation. So we also have to sample unit vectors
        # Furthermore, localization_error should not be added, since
        #   self.distributions already contain it
        arr = np.empty((loopingtrace.T, d))
        arr[:] = np.nan
        magnitudes = np.array(
            [self.distributions[state].rvs() for state in loopingtrace.state])
        vectors = np.random.normal(size=(len(magnitudes), d))
        vectors *= np.expand_dims(magnitudes / np.linalg.norm(vectors, axis=1),
                                  1)
        arr[loopingtrace.t, :] = vectors

        return Trajectory.fromArray(
            arr,
            localization_error=np.array(d * [localization_error]),
            loopingtrace=loopingtrace,
        )
Beispiel #8
0
    def trajectory_from_loopingtrace(self,
                                     loopingtrace,
                                     localization_error=None,
                                     d=3):
        if localization_error is None:
            if self.localization_error is None:
                raise ValueError(
                    "Need to specify either localization_error or model.localization_error"
                )
            else:
                localization_error = self.localization_error
        if np.isscalar(localization_error):
            localization_error = d * [localization_error]
        localization_error = np.asarray(localization_error)
        if localization_error.shape != (d, ):
            raise ValueError("Did not understand localization_error")

        arr = np.empty((loopingtrace.T, d))
        arr[:] = np.nan

        cur_mod = self.models[loopingtrace[0]]
        conf = cur_mod.conf_ss(True, d)
        arr[loopingtrace.t[0], :] = cur_mod.measurement @ conf

        for i in range(1, len(loopingtrace)):
            cur_mod = self.models[loopingtrace[i]]
            conf = cur_mod.evolve(conf, True,
                                  loopingtrace.t[i] - loopingtrace.t[i - 1])
            arr[loopingtrace.t[i], :] = cur_mod.measurement @ conf

        return Trajectory.fromArray(
            arr + localization_error[np.newaxis, :] *
            np.random.normal(size=arr.shape),
            localization_error=localization_error,
            loopingtrace=loopingtrace,
        )
Beispiel #9
0
def csv(filename,
        columns=['x', 'y', 't', 'id'],
        tags=None,
        meta_post={},
        **kwargs):
    """
    Load data from a .csv file.

    This uses ``np.genfromtxt``, and all kwargs are forwarded to it. By
    default, we assume the delimiter ``','`` and utf8 encoding for string data,
    but these can of course be changed. Refer to ``numpy.genfromtxt``.
    
    Parameters
    ----------
    filename : string or file-like object
        the file to be read
    columns : list
        how to interpret the columns in the file. Use any of these identifiers:
        ``{'x', 'y', 'z', 'x2', 'y2', 'z2', 't', 'id', None}``, where ``'t'``
        (mandatory) is the frame number, ``'id'`` (mandatory) the trajectory
        id, and the remaining ones can be used to indicate spatial components
        of single or double-locus trajectories. Use ``None`` to indicate a
        column that should be ignored.  Columns beyond the list given here will
        be ignored in any case. Finally, the data for any str identifier not
        matching one of the above will be written to a corresponding entry in
        the trajectory's `meta` dict.
    tags : str, list of str or set of str, optional
        the tag(s) to be associated with trajectories from this file
    meta_post : dict, optional
        post-processing options for the `meta` data. Keys should be `meta`
        field names, values can be "unique" or "mean". With the former, all the
        values in the corresponding column should be the same, and only that
        value (instead of the whole array) will be written into the meta field.
        With the latter we simply take the mean of the array.

    Returns
    -------
    TaggedSet
        the loaded data set

    Examples
    --------
    This function can be used to load data from ``pandas.DataFrame`` tables, if
    they conform to the format described above:

    >>> import io
    ... import pandas as pd
    ... import tracklib as tl
    ...
    ... # Set up a DataFrame containing some dummy data
    ... # Caveat to pay attention to: the order of the columns is important!
    ... df = pd.DataFrame()
    ... df['frame_no'] = [1, 2, 3]
    ... df['trajectory_id'] = [4, 4, 4]
    ... df['coord1'] = [1, 2, 3]
    ... df['coord2'] = [4, 5, 6]
    ...
    ... csv_stream = io.StringIO(df.to_csv())
    ... dataset = tl.io.load.csv(csv_stream,
    ...                          [None, 't', 'id', 'x', 'y'], # first column will be index
    ...                          delimiter=',',               # pandas' default
    ...                          skip_header=1,               # pandas prints a header line
    ...                         )
    """
    col_inds = {}
    for i, key in enumerate(columns):
        if type(key) == str:  # make sure to exclude None's
            col_inds[key] = i

    keys = col_inds.keys()
    assert 'id' in keys
    assert 't' in keys

    # Get shape of trajectory and check that the given keys make sense
    if 'z' in keys:
        d = 3
        assert 'y' in keys
        assert 'x' in keys
    elif 'y' in keys:
        d = 2
        assert 'x' in keys
    elif 'x' in keys:
        d = 1
    else:  # pragma: no cover
        raise ValueError(
            "No valid coordinates found in specification: {}".format(columns))

    # data_keys = ['x', 'x2', 'y', 'y2', ...]
    data_keys = sorted(keys & {'x', 'y', 'z', 'x2', 'y2', 'z2'})
    N = 1
    if any('2' in key for key in keys):
        N = 2
        for key in data_keys:
            assert key[0] in data_keys
            assert key[0] + '2' in data_keys

    # ['id', 't', {data}, {meta}]
    # this is mostly to keep track of which keys exist
    sorted_keys = ['id', 't', data_keys, keys - {'id', 't', *data_keys}]

    # Read data
    gft_kwargs = dict(delimiter=',', dtype=None, encoding='utf8')
    gft_kwargs.update(kwargs)
    data = np.genfromtxt(filename, **gft_kwargs)

    # This feels suboptimal... maybe there's a better way?
    data_cols = [col_inds[key] for key in sorted_keys[2]]
    meta_cols = [col_inds[key] for key in sorted_keys[3]]
    try:
        # sorted_data = [id-array, t-array, data-array, list of meta-arrays]
        sorted_data = [
            np.array([line[col_inds['id']] for line in data]),
            np.array([line[col_inds['t']] for line in data]).astype(int),
            np.array([
                [line[col] for col in data_cols] for line in data
            ]).astype(float),  # shape: (-1, N*d), sorted x, x2, y, ...
            [np.array([line[col] for line in data]) for col in meta_cols],
        ]
    except IndexError:
        raise ValueError(
            "Too many columns for file. Did you use the right delimiter?")
    del data
    ids = set(sorted_data[0])

    # Assemble data set
    out = TaggedSet()
    for myid in ids:
        ind = sorted_data[0] == myid
        mydata = np.moveaxis(sorted_data[2][ind].reshape((-1, d, N)), 2, 0)
        myt = sorted_data[1][ind]
        myt -= np.min(myt)

        trajdata = np.empty((N, np.max(myt) + 1, d), dtype=float)
        trajdata[:] = np.nan
        trajdata[:, myt, :] = mydata

        meta = {}
        for i, key in enumerate(sorted_keys[3]):
            mymeta = sorted_data[3][i][ind]
            if key in meta_post:
                post = meta_post[key]
                if post == 'unique':
                    ms = set(mymeta)
                    if len(ms) > 1:
                        raise RuntimeError(
                            "Data in column '{}' is not unique for trajectory with id {}"
                            .format(key, myid))
                    meta[key] = ms.pop()
                elif post == 'mean':
                    meta[key] = np.mean(mymeta)
                elif post == 'nanmean':
                    meta[key] = np.nanmean(mymeta.astype(float))
                else:  # pragma: no cover
                    raise ValueError(f"invalid meta post-proc: {post}")
            else:  # assume that we have floats and fill with nan's
                meta[key] = np.empty(np.max(myt) + 1, dtype=float)
                meta[key][:] = np.nan
                meta[key][myt] = mymeta

        out.add(Trajectory.fromArray(trajdata, **meta), tags)

    return out