Example #1
0
def transfer_info(leader,follower,timescale):
    """
    Wrapper for computing transfer entropy between two trajectories using a binary representation.
    2016-11-09
    
    Params:
    -------
    leader (vector)
    follower (vector)
    timescale (vector)
        Given in units of the number of entries to skip in sample.
    """
    from entropy.transfer import TransferEntropy
    te = TransferEntropy()
    
    lchange = discrete_vel(leader,timescale)
    fchange = discrete_vel(follower,timescale)
    
    if lchange.ndim>1:
        lchange = unique_rows(lchange,return_inverse=True)
    else:
        lchange = unique_rows(lchange[:,None],return_inverse=True)
    if fchange.ndim>1:
        fchange = unique_rows(fchange,return_inverse=True)
    else:
        fchange = unique_rows(fchange[:,None],return_inverse=True)

    ltofinfo = te.n_step_transfer_entropy(lchange,fchange,discretize=False)
    ftolinfo = te.n_step_transfer_entropy(fchange,lchange,discretize=False) 
    return ltofinfo,ftolinfo
Example #2
0
    def zipf_law(X):
        """Return frequency rank of states.
        
        Parameters
        ----------
        X : ndarray
            (n_samples,n_dim)

        Returns
        -------
        uniqX : ndarray
            Unique states.
        uniqIx : ndarray
            uniqX[uniqIx] recovers given X.
        p : ndarray
            Probability of each unique state.
        """
        from misc.utils import unique_rows

        # Collect unique states.
        uniqIx = unique_rows(X)
        uniqX = X[uniqIx]

        p = np.bincount(unique_rows(X, return_inverse=True))
        p = p / p.sum()

        # Sort everything by the probability.
        sortIx = np.argsort(p)[::-1]
        p = p[sortIx]
        uniqIx = uniqIx[sortIx]
        uniqX = uniqX[sortIx]

        return uniqX, uniqIx, p
Example #3
0
def preprocess_average_repeat_values(X, Y):
    """For any repeat data points X, take the average of the measured values Y.
    
    Parameters
    ----------
    X : ndarray
    Y : ndarray
    
    Returns
    -------
    XnoRepeats : ndarray
    YnoRepeats : ndarray
    """
    Xsquished = X[unique_rows(X)]
    Ysquished = np.zeros(len(Xsquished))

    for i, row in enumerate(Xsquished):
        Ysquished[i] = Y[(row[None, :] == X).all(1)].mean()

    return Xsquished, Ysquished
Example #4
0
    def n_step_transfer_entropy(self,
                                x,
                                y,
                                kPast=1,
                                kPastOther=1,
                                kFuture=1,
                                bins=[10, 10, 10],
                                discretize=True,
                                returnProbabilities=False):
        """
        Transfer entropy from x->y 
        Using histogram binning for unidimensional data and k-means clustering for k-dimensional data where
        input data points are a set of points from a trajectory.

        Compute n step transfer entropy by summing the entropies when the transfer entropy is rewritten.

        Note:
        Random seeds with k-means clustering might affect the computed results. Good idea to try several
        iterations or many different k-means seeds.

        2016-11-09

        Params:
        -------
        x,y
            (n_samples,n_dim)
        kPast (int)
            k steps into the past
        kPastOther (int)
            k steps into the past for other trajectory that we're conditioning on 
        kFuture(int)
            k steps into the future
        [binsPast,binsOtherPast,binsFuture] (list of ints)
            number of bins (or clusters) for trajectories
        discretize (bool=True)
            Whether or not to discretize the data.
        returnProbabilities (False, bool)
        """
        # Variables
        # discreteFuture,discretePast,discreteOtherPast : 1d vectors labeling sets of trajectories
        kPastMx = max([kPast, kPastOther])
        transferEntropy = 0.

        # Construct matrix of data points (i_{n+1},i_n,j_n) where i and j are vectors.
        future = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kFuture))
        past = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPast))
        otherPast = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPastOther))
        for i in range(future.shape[0]):
            future[i, :] = y[(i + kPastMx):(i + kPastMx + kFuture)]
            past[i, :] = y[(i + kPastMx - kPast):(i + kPastMx)]
            otherPast[i, :] = x[(i + kPastMx - kPastOther):(i + kPastMx)]

        if discretize:
            discreteFuture = self.digitize_vector_or_scalar(future, bins[2])
            discretePast = self.digitize_vector_or_scalar(past, bins[0])
            discreteOtherPast = self.digitize_vector_or_scalar(
                otherPast, bins[1])
        else:
            discreteFuture = unique_rows(future, return_inverse=True)
            discretePast = unique_rows(past, return_inverse=True)
            discreteOtherPast = unique_rows(otherPast, return_inverse=True)

        # Marginal distributions.
        # Compute p(i_{n+1},i_n,j_n)
        xy = np.c_[(
            discreteFuture, discretePast,
            discreteOtherPast)]  # data as row vectors arranged into matrix
        uniqxy = xy[unique_rows(
            xy
        )]  # unique entries in data that will be assigned probabilities using kernel
        pXXkY = np.zeros((uniqxy.shape[0]))
        for i, row in enumerate(uniqxy):
            pXXkY[i] = np.sum(np.all(row[None, :] == xy, 1))
        pXXkY /= np.sum(pXXkY)

        Xk = np.bincount(discretePast)
        pXk = Xk / np.sum(Xk)

        YXk = np.c_[(discretePast, discreteOtherPast)]
        uniqYXk = YXk[unique_rows(YXk)]
        pYXk = np.zeros((uniqYXk.shape[0]))
        for i, r in enumerate(uniqYXk):
            pYXk[i] = np.sum(np.all(r[None, :] == YXk, 1))
        pYXk = pYXk / np.sum(pYXk)

        XXk = np.c_[(discreteFuture, discretePast)]
        uniqXXk = XXk[unique_rows(XXk)]
        pXXk = np.zeros((uniqXXk.shape[0]))
        for i, r in enumerate(uniqXXk):
            pXXk[i] = np.sum(np.all(r[None, :] == XXk, 1))
        pXXk = pXXk / np.sum(pXXk)

        transferEntropy = (np.nansum(pXXkY * np.log2(pXXkY)) +
                           np.nansum(pXk * np.log2(pXk)) -
                           np.nansum(pYXk * np.log2(pYXk)) -
                           np.nansum(pXXk * np.log2(pXXk)))
        return transferEntropy
Example #5
0
    def _n_step_transfer_entropy(self,
                                 x,
                                 y,
                                 kPast=1,
                                 kPastOther=1,
                                 kFuture=1,
                                 bins=[10, 10, 10],
                                 returnProbabilities=False):
        """
        Transfer entropy from x->y 
        Using histogram binning for unidimensional data and k-means clustering for k-dimensional data where
        input data points are a set of points from a trajectory.

        We compute the empirical distribution p(i_{n+1},i_n,j_n) and marginalize over this to get the
        conditional probabilities required for transfer entropy calculation.

        NOTE:
        Random seeds with k-means clustering might affect the computed results. Good idea to try several
        iterations or many different k-means seeds.

        2015-12-23

        Params:
        x
            (n_samples,n_dim)
        y
        kPast (int)
            k steps into the past
        kPastOther (int)
            k steps into the past for other trajectory that we're conditioning on 
        kFuture(int)
            k steps into the future
        [binsPast,binsOtherPast,binsFuture] (list of ints)
            number of bins (or clusters) for trajectories
        returnProbabilities (False, bool)
        """
        kPastMx = max([kPast, kPastOther])

        transferEntropy = 0.

        # Construct matrix of data points (i_{n+1},i_n,j_n) where i and j are vectors.
        future = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kFuture))
        past = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPast))
        otherPast = np.zeros((x.shape[0] - kPastMx - kFuture + 1, kPastOther))
        for i in range(future.shape[0]):
            future[i, :] = y[(i + kPastMx):(i + kPastMx + kFuture)]
            past[i, :] = y[(i + kPastMx - kPast):(i + kPastMx)]
            otherPast[i, :] = x[(i + kPastMx - kPastOther):(i + kPastMx)]

        discreteFuture = self.digitize_vector_or_scalar(future, bins[2])
        discretePast = self.digitize_vector_or_scalar(past, bins[0])
        discreteOtherPast = self.digitize_vector_or_scalar(otherPast, bins[1])

        xy = np.c_[(
            discreteFuture, discretePast,
            discreteOtherPast)]  # data as row vectors arranged into matrix
        uniqxy = xy[unique_rows(
            xy
        )]  # unique entries in data that will be assigned probabilities using kernel
        pijk = np.zeros((uniqxy.shape[0]))

        # Compute p(i_{n+1},i_n,j_n)
        for i, row in enumerate(uniqxy):
            pijk[i] = np.sum(np.prod(row[None, :] == xy, 1))
        pijk /= np.sum(pijk)

        # Define functions for multiprocessing. ------------------------------------------------
        def calc_piCondij(i, row, store):
            """p( i_future | i_past, j_past )"""
            ix = np.where(np.prod(row[None, 1:] == uniqxy[:, 1:], 1))[0]
            p = pijk[ix]
            p /= np.sum(p)

            piCondij = np.sum(p[uniqxy[ix][:, 0] == row[0]])

            # Store result in shared memory access.
            store[i] = piCondij

        def calc_piCondi(i, row, store):
            """p( i_future | i_past )"""
            ix = np.where(row[None, 1] == uniqxy[:, 1])[0]
            p = pijk[ix]
            p /= np.sum(p)

            piCondi = np.sum(p[uniqxy[ix][:, 0] == row[0]])

            # Store result in shared memory access.
            store[i] = piCondi

        # Parallelization steps:
        # 1. Create shared memory access for storing results across independent processes.
        # 2. Generate list of tasks in store them in a Queue. Queue must have sentinel (or it holds indefinitely)
        # 3. Generate workers and start them.
        # 4. End workers.

        # Define workers that will take jobs from queue to complete.
        def worker_piCondij(work_queue, storage):
            while True:
                nextTask = work_queue.get()
                if not nextTask is None:
                    nextTask.append(storage)
                    calc_piCondij(*nextTask)
                else:
                    break

        def worker_piCondi(work_queue, storage):
            while True:
                nextTask = work_queue.get()
                if not nextTask is None:
                    nextTask.append(storage)
                    calc_piCondi(*nextTask)
                else:
                    break

        def generate_work_queue():
            # Iterate over all unique data points.
            workQueue = Queue()  # List of jobs to complete.
            for i, row in enumerate(uniqxy):
                workQueue.put([i, row])
            # Place one sentinel for each worker so it stops waiting for the queue to fill.
            for i in range(self.N_WORKERS):
                workQueue.put(None)
            return workQueue

        # Memory map for shared memory access to processes.
        # These will store results of computation.
        piCondijStore = Array('d', np.zeros((uniqxy.shape[0])))
        piCondiStore = Array('d', np.zeros((uniqxy.shape[0])))

        self.run_parallel_job(worker_piCondij, generate_work_queue(),
                              piCondijStore)
        self.run_parallel_job(worker_piCondi, generate_work_queue(),
                              piCondiStore)

        piCondij = np.array(piCondijStore[:])
        piCondi = np.array(piCondiStore[:])

        transferEntropy = np.nansum(pijk *
                                    (np.log2(piCondij) - np.log2(piCondi)))
        if returnProbabilities:
            return transferEntropy, pijk, piCondij, piCondi
        return transferEntropy