Ejemplo n.º 1
0
def test_fit_shapes():
    K = 5
    V = 3
    T = 10
    es = EventSegment(K, n_iter=2)
    sample_data = np.random.rand(V, T)
    es.fit(sample_data.T)

    assert es.segments_[0].shape == (T, K), "Segmentation from fit " \
                                            "has incorrect shape"
    assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \
        "Segmentation from learn_events not correctly normalized"

    T2 = 15
    sample_data2 = np.random.rand(V, T2)
    test_segments, test_ll = es.find_events(sample_data2.T)

    assert test_segments.shape == (T2, K), "Segmentation from find_events " \
                                           "has incorrect shape"
    assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \
        "Segmentation from find_events not correctly normalized"

    es_invalid = EventSegment(K)
    with pytest.raises(ValueError, message="T < K should cause error"):
        es_invalid.model_prior(K - 1)
    with pytest.raises(ValueError, message="#Events < K should cause error"):
        es_invalid.set_event_patterns(np.zeros((V, K - 1)))
Ejemplo n.º 2
0
def test_fit_shapes():
    K = 5
    V = 3
    T = 10
    es = EventSegment(K, n_iter=2)
    sample_data = np.random.rand(V, T)
    es.fit(sample_data.T)

    assert es.segments_[0].shape == (T, K), "Segmentation from fit " \
                                            "has incorrect shape"
    assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \
        "Segmentation from learn_events not correctly normalized"

    T2 = 15
    sample_data2 = np.random.rand(V, T2)
    test_segments, test_ll = es.find_events(sample_data2.T)

    assert test_segments.shape == (T2, K), "Segmentation from find_events " \
                                           "has incorrect shape"
    assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \
        "Segmentation from find_events not correctly normalized"

    es_invalid = EventSegment(K)
    with pytest.raises(ValueError, message="T < K should cause error"):
        es_invalid.model_prior(K-1)
    with pytest.raises(ValueError, message="#Events < K should cause error"):
        es_invalid.set_event_patterns(np.zeros((V, K-1)))
Ejemplo n.º 3
0
    def run_simulation_computation_time(self, nstates, rep):
        bounds, subData, _ = self.generate_simulated_data_HRF(rep=rep)
        res6 = dict()
        res6['duration_GSBS'] = np.zeros([nstates])
        res6['duration_HMM_fixK'] = np.zeros([nstates])
        res6['duration_HMMsm_fixK'] = np.zeros([nstates])

        for i in range(2, nstates):
            print(rep, i)
            states = gsbs_extra.GSBS(x=subData[0, :, :], kmax=i)
            tic = timeit.default_timer()
            states.fit()
            res6['duration_GSBS'][i] = timeit.default_timer() - tic

            tic = timeit.default_timer()
            ev = HMM(i, split_merge=False)
            ev.fit(subData[0, :, :])
            res6['duration_HMM_fixK'][i] = timeit.default_timer() - tic

            tic = timeit.default_timer()
            ev = HMM(i, split_merge=True)
            ev.fit(subData[0, :, :])
            res6['duration_HMMsm_fixK'][i] = timeit.default_timer() - tic

        res6['duration_HMM_estK'] = np.cumsum(res6['duration_HMM_fixK'])
        res6['duration_HMMsm_estK'] = np.cumsum(res6['duration_HMMsm_fixK'])

        return res6
Ejemplo n.º 4
0
def test_simple_boundary():
    es = EventSegment(2)
    random_state = np.random.RandomState(0)

    sample_data = np.array([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]]) + \
        random_state.rand(2, 7) * 10
    es.fit(sample_data.T)

    events = np.argmax(es.segments_[0], axis=1)
    assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
        "Failed to correctly segment two events"

    events_predict = es.predict(sample_data.T)
    assert np.array_equal(events_predict, [0, 0, 0, 1, 1, 1, 1]), \
        "Error in predict interface"
Ejemplo n.º 5
0
def test_simple_boundary():
    es = EventSegment(2)
    random_state = np.random.RandomState(0)

    sample_data = np.array([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]]) + \
        random_state.rand(2, 7) * 10
    es.fit(sample_data.T)

    events = np.argmax(es.segments_[0], axis=1)
    assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
        "Failed to correctly segment two events"

    events_predict = es.predict(sample_data.T)
    assert np.array_equal(events_predict, [0, 0, 0, 1, 1, 1, 1]), \
        "Error in predict interface"
Ejemplo n.º 6
0
def test_split_merge():
    ev = np.array([
        0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
    ])
    random_state = np.random.RandomState(0)
    ev_pat = random_state.rand(5, 10)
    D = np.zeros((len(ev), 10))
    for t in range(len(ev)):
        D[t, :] = ev_pat[ev[t], :] + 0.1 * random_state.rand(10)

    hmm_sm = EventSegment(5, split_merge=True, split_merge_proposals=2)
    hmm_sm.fit(D)
    hmm_events = np.argmax(hmm_sm.segments_[0], axis=1)

    assert np.all(ev == hmm_events),\
        "Merge/split fails to find highly uneven events"
Ejemplo n.º 7
0
def compute_fits_hmm(data: np.ndarray,
                     k: int,
                     mindist: int,
                     type='HMM',
                     y=None,
                     t1=None,
                     ind1=None,
                     zs=False):
    if type == 'HMM':
        hmm = HMM(k)
    elif type == 'HMMsplit':
        hmm = HMM(k, split_merge=True)

    if zs == True:
        data = zscore(data, axis=0, ddof=1)
    hmm.fit(data)

    if y is None:
        tdata = data
    else:
        if zs == True:
            y = zscore(y, axis=0, ddof=1)
        tdata = y

    _, LL_HMM = hmm.find_events(tdata)

    hmm_bounds = np.insert(np.diff(np.argmax(hmm.segments_[0], axis=1)), 0,
                           0).astype(int)

    if t1 is None and ind1 is None:
        ind = np.triu(np.ones(tdata.shape[0], bool), mindist)
        z = GSBS._zscore(tdata)
        t = np.cov(z)[ind]
    else:
        ind = ind1
        t = t1

    stateseq = deltas_states(deltas=hmm_bounds)[:, None]
    diff, same, alldiff = (lambda c: (c == 1, c == 0, c > 0))(cdist(
        stateseq, stateseq, "cityblock")[ind])
    WAC_HMM = np.mean(t[same]) - np.mean(t[alldiff])
    tdist_HMM = 0 if sum(same) < 2 else ttest_ind(
        t[same], t[diff], equal_var=False)[0]

    return LL_HMM, WAC_HMM, tdist_HMM, hmm_bounds, t, ind
Ejemplo n.º 8
0
def test_chains():
    es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B']))
    sample_data = np.array([[0, 0, 0], [1, 1, 1]])

    with pytest.raises(RuntimeError):
        seg = es.fit(sample_data.T)[0]
        pytest.fail("Can't use fit() with event chains")

    es.set_event_patterns(np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]]))
    seg = es.find_events(sample_data.T, 0.1)[0]

    ev = np.nonzero(seg > 0.99)[1]
    assert np.array_equal(ev, [2, 3, 4]),\
        "Failed to fit with multiple chains"
Ejemplo n.º 9
0
K = maxedges # number of possessions
T = bold_roi[task_name].shape[0] # Time points
bounds_subj_fixk=np.zeros((nS,T))
bounds_subj_smooth_fixk=np.zeros((nS,T))
print(V)
for subj in range(nS):
    D = bold_roi[task_name][:,zzmat,subj].T
    zmat=D[:,1]!=0
    D=D[zmat,:]
    # Find the events in this dataset
    if splitm:
        hmm_sim = EventSegment(K,split_merge=True)
    else:
        hmm_sim = EventSegment(K)
        
    hmm_sim.fit(D.T)
    pred_seg = hmm_sim.segments_[0]
    # extract the boundaries 
    bs=np.where(np.diff(np.argmax(pred_seg, axis=1)))[0]
    bounds_subj_fixk[subj,bs] = 1 # mark the boundaries in the continuous space
    bounds_subj_smooth_fixk[subj,:]=running_mean(bounds_subj_fixk[subj,:],smoothf,smoothshift)
    if ipynby==1:
        if subj==0: # plot the data for sample subject
            f, ax = plt.subplots(1,1, figsize=(6, 2))
            ax.imshow(D, interpolation='nearest', cmap='viridis', aspect='auto')
            ax.set_ylabel('Voxels')
            ax.set_xlabel('TRs')
            ax.set_title('Brain activity for sample subject')
            
            f, ax = plt.subplots(1,1, figsize = (10,8))
            ax.imshow(np.corrcoef(D.T), cmap='viridis')
Ejemplo n.º 10
0
    def run_simulation_evlength(self,
                                length_std,
                                nstates_list,
                                run_HMM,
                                rep,
                                TRfactor=1,
                                finetune=1):

        res = dict()
        list2 = ['dists_GS', 'dists_HMM', 'dists_HMMsplit']
        for key in list2:
            res[key] = np.zeros([
                np.shape(length_std)[0],
                np.shape(nstates_list)[0], nstates_list[-1]
            ])

        list = [
            'sim_GS', 'sim_HMM', 'sim_HMMsplit', 'simz_GS', 'simz_HMM',
            'simz_HMMsplit'
        ]
        for key in list:
            res[key] = np.zeros(
                [np.shape(length_std)[0],
                 np.shape(nstates_list)[0]])
        res['statesreal'] = np.zeros(
            [np.shape(length_std)[0],
             np.shape(nstates_list)[0], self.ntime])
        res['bounds'] = np.zeros(
            [np.shape(length_std)[0],
             np.shape(nstates_list)[0], self.ntime])
        res['bounds_HMMsplit'] = np.zeros(
            [np.shape(length_std)[0],
             np.shape(nstates_list)[0], self.ntime])

        for idxl, l in enumerate(length_std):
            for idxn, n in enumerate(nstates_list):
                print(rep, l)
                bounds, subData, _ = self.generate_simulated_data_HRF(
                    length_std=l, nstates=n, TRfactor=TRfactor, rep=rep)
                res['statesreal'][idxl, idxn, :] = deltas_states(bounds)
                states = gsbs_extra.GSBS(kmax=n,
                                         x=subData[0, :, :],
                                         finetune=finetune)
                states.fit()
                res['sim_GS'][idxl, idxn], res['simz_GS'][
                    idxl,
                    idxn], res['dists_GS'][idxl, idxn,
                                           0:n] = fit_metrics_simulation(
                                               bounds,
                                               np.double(
                                                   states.get_bounds(k=n) > 0))
                res['bounds'][idxl, idxn, :] = states.bounds

                if run_HMM is True:
                    ev = HMM(n, split_merge=False)
                    ev.fit(subData[0, :, :])
                    hmm_bounds = np.insert(
                        np.diff(np.argmax(ev.segments_[0], axis=1)), 0,
                        0).astype(int)
                    ev = HMM(n, split_merge=True)
                    ev.fit(subData[0, :, :])
                    hmm_bounds_split = np.insert(
                        np.diff(np.argmax(ev.segments_[0], axis=1)), 0,
                        0).astype(int)
                    res['sim_HMM'][idxl, idxn], res['simz_HMM'][
                        idxl,
                        idxn], res['dists_HMM'][idxl, idxn,
                                                0:n] = fit_metrics_simulation(
                                                    bounds, hmm_bounds)
                    res['sim_HMMsplit'][idxl, idxn], res['simz_HMMsplit'][
                        idxl, idxn], res['dists_HMMsplit'][
                            idxl, idxn, 0:n] = fit_metrics_simulation(
                                bounds, hmm_bounds_split)
                    res['bounds_HMMsplit'][idxl, idxn, :] = hmm_bounds_split

        return res
Before applying any model, a good first step is to plot the correlation between activity patterns for each pair of timepoints during the movie. In this dataset, this shows blocks along the diagonal, which indicates that activity patterns are remaining stable for periods of tens of timepoints. This is the kind of structure that the HMM and GSBS models will be looking for.

plt.figure(figsize=(10,8))
plt.imshow(np.corrcoef(movie_group))
plt.xlabel('Timepoint')
plt.ylabel('Timepoint')
plt.colorbar()
plt.title('Spatial pattern correlation');

#### 1.1 Fitting the HMM

To use an HMM to find both the event timings and the patterns corresponding to each event, we can use the EventSegment class from the brainiak toolbox. We need to specify the number of events, which here we set to 29 (corresponding to the number of boundaries typically annotated by human subjects).

movie_HMM = EventSegment(n_events = 29)
movie_HMM.fit(movie_group);

This fit produces:
* The log-likelihood (measuring overall model fit) over training. (Note that the log-likelihood on held-out test data is often a better measure of model quality - see below).
* The mean voxel pattern for each event. Here we show only 1% of the voxels since the ROI is large.
* A matrix showing the probability of being in each event at each timepoint. We can use this to derive the most likely timepoints where boundaries occur, and plot these on top of the timepoint similarity matrix for comparison.

# Plotting the log-likelihood (measuring overall model fit)
plt.figure(figsize = (12, 4))
plt.plot(movie_HMM.ll_)
plt.title('Log likelihood during training')
plt.xlabel('Model fitting steps')

# Plotting mean activity in each event for some example voxels
plt.figure(figsize = (12, 4))
example_vox = np.arange(0,movie_HMM.event_pat_.shape[0],100)