Beispiel #1
0
def test_event_transfer():
    es = EventSegment(2)
    sample_data = np.asarray([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]])

    with pytest.raises(NotFittedError, message="Should need to set variance"):
        seg = es.find_events(sample_data.T)[0]

    with pytest.raises(NotFittedError, message="Should need to set patterns"):
        seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0]

    es.set_event_patterns(np.asarray([[1, 0], [0, 1]]))
    seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0]

    events = np.argmax(seg, axis=1)
    assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
        "Failed to correctly transfer two events to new data"
Beispiel #2
0
def test_fit_shapes():
    K = 5
    V = 3
    T = 10
    es = EventSegment(K, n_iter=2)
    sample_data = np.random.rand(V, T)
    es.fit(sample_data.T)

    assert es.segments_[0].shape == (T, K), "Segmentation from fit " \
                                            "has incorrect shape"
    assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \
        "Segmentation from learn_events not correctly normalized"

    T2 = 15
    sample_data2 = np.random.rand(V, T2)
    test_segments, test_ll = es.find_events(sample_data2.T)

    assert test_segments.shape == (T2, K), "Segmentation from find_events " \
                                           "has incorrect shape"
    assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \
        "Segmentation from find_events not correctly normalized"

    es_invalid = EventSegment(K)
    with pytest.raises(ValueError, message="T < K should cause error"):
        es_invalid.model_prior(K-1)
    with pytest.raises(ValueError, message="#Events < K should cause error"):
        es_invalid.set_event_patterns(np.zeros((V, K-1)))
Beispiel #3
0
def test_fit_shapes():
    K = 5
    V = 3
    T = 10
    es = EventSegment(K, n_iter=2)
    sample_data = np.random.rand(V, T)
    es.fit(sample_data.T)

    assert es.segments_[0].shape == (T, K), "Segmentation from fit " \
                                            "has incorrect shape"
    assert np.isclose(np.sum(es.segments_[0], axis=1), np.ones(T)).all(), \
        "Segmentation from learn_events not correctly normalized"

    T2 = 15
    sample_data2 = np.random.rand(V, T2)
    test_segments, test_ll = es.find_events(sample_data2.T)

    assert test_segments.shape == (T2, K), "Segmentation from find_events " \
                                           "has incorrect shape"
    assert np.isclose(np.sum(test_segments, axis=1), np.ones(T2)).all(), \
        "Segmentation from find_events not correctly normalized"

    es_invalid = EventSegment(K)
    with pytest.raises(ValueError, message="T < K should cause error"):
        es_invalid.model_prior(K - 1)
    with pytest.raises(ValueError, message="#Events < K should cause error"):
        es_invalid.set_event_patterns(np.zeros((V, K - 1)))
Beispiel #4
0
def test_event_transfer():
    es = EventSegment(2)
    sample_data = np.asarray([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]])

    with pytest.raises(NotFittedError, message="Should need to set variance"):
        seg = es.find_events(sample_data.T)[0]

    with pytest.raises(NotFittedError, message="Should need to set patterns"):
        seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0]

    es.set_event_patterns(np.asarray([[1, 0], [0, 1]]))
    seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0]

    events = np.argmax(seg, axis=1)
    assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
        "Failed to correctly transfer two events to new data"
Beispiel #5
0
def test_chains():
    es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B']))

    es.set_event_patterns(np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]]))
    sample_data = np.array([[0, 0, 0], [1, 1, 1]])
    seg = es.find_events(sample_data.T, 0.1)[0]

    ev = np.nonzero(seg > 0.99)[1]
    assert np.array_equal(ev, [2, 3, 4]),\
        "Failed to fit with multiple chains"
Beispiel #6
0
def test_sym_ll():
    ev = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2])
    random_state = np.random.RandomState(0)
    ev_pat = random_state.rand(3, 10)
    D_forward = np.zeros((len(ev), 10))
    for t in range(len(ev)):
        D_forward[t, :] = ev_pat[ev[t], :] + 0.1 * random_state.rand(10)
    D_backward = np.flip(D_forward, axis=0)

    hmm_forward = EventSegment(3)
    hmm_forward.set_event_patterns(ev_pat.T)
    _, ll_forward = hmm_forward.find_events(D_forward, var=1)

    hmm_backward = EventSegment(3)
    hmm_backward.set_event_patterns(np.flip(ev_pat.T, axis=1))
    _, ll_backward = hmm_backward.find_events(D_backward, var=1)

    assert (ll_forward == ll_backward),\
        "Log-likelihood not symmetric forward/backward"
Beispiel #7
0
def test_chains():
    es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B']))

    es.set_event_patterns(np.array([[1, 1, 0, 0, 0],
                                    [0, 0, 1, 1, 1]]))
    sample_data = np.array([[0, 0, 0], [1, 1, 1]])
    seg = es.find_events(sample_data.T, 0.1)[0]

    ev = np.nonzero(seg > 0.99)[1]
    assert np.array_equal(ev, [2, 3, 4]),\
        "Failed to fit with multiple chains"
Beispiel #8
0
def test_sym():
    es = EventSegment(4)

    evpat = np.repeat(np.arange(10).reshape(-1, 1), 4, axis=1)
    es.set_event_patterns(evpat)

    D = np.repeat(np.arange(10).reshape(1, -1), 20, axis=0)
    ev = es.find_events(D, var=1)[0]

    # Check that events 1-4 and 2-3 are symmetric
    assert np.all(np.isclose(ev[:, :2], np.fliplr(np.flipud(ev[:, 2:])))),\
        "Fit with constant data is not symmetric"
Beispiel #9
0
def test_sym():
    es = EventSegment(4)

    evpat = np.repeat(np.arange(10).reshape(-1, 1), 4, axis=1)
    es.set_event_patterns(evpat)

    D = np.repeat(np.arange(10).reshape(1, -1), 20, axis=0)
    ev = es.find_events(D, var=1)[0]

    # Check that events 1-4 and 2-3 are symmetric
    assert np.all(np.isclose(ev[:, :2], np.fliplr(np.flipud(ev[:, 2:])))),\
        "Fit with constant data is not symmetric"
def heldout_ll(data, n_events, split):
    """Compute log-likelihood on heldout subjects

    Fits an event segmentation model with n_events to half of the subjects,
    then measures the log-likelihood of this model on the other half. The
    returned log-likehood averages across both choices of which half is
    used for training and which is used for testing. The boolean array split
    defines which subjects are in each half.

    Parameters
    ----------
    data : ndarray
        subj x TR x Voxels data array
    n_events : int
        Number of events for event segmentation model
    split : ndarray
        Boolean vector, subj in one group are True and in the other are False

    Returns
    -------
    float
        Average of log-likelihoods on testing groups
    """

    d = deepcopy(data)

    # Remove nan voxels
    nan_idxs = np.where(np.isnan(d))
    nan_idxs = list(set(nan_idxs[2]))
    d = np.delete(np.asarray(d), nan_idxs, axis=2)

    # Train and test event segmentation across groups
    group1 = d[split].mean(0)
    group2 = d[~split].mean(0)
    es = EventSegment(n_events).fit(group1)
    _, ll12 = es.find_events(group2)
    es = EventSegment(n_events).fit(group2)
    _, ll21 = es.find_events(group1)

    return (ll12 + ll21) / 2
Beispiel #11
0
def test_chains():
    es = EventSegment(5, event_chains=np.array(['A', 'A', 'B', 'B', 'B']))
    sample_data = np.array([[0, 0, 0], [1, 1, 1]])

    with pytest.raises(RuntimeError):
        seg = es.fit(sample_data.T)[0]
        pytest.fail("Can't use fit() with event chains")

    es.set_event_patterns(np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]]))
    seg = es.find_events(sample_data.T, 0.1)[0]

    ev = np.nonzero(seg > 0.99)[1]
    assert np.array_equal(ev, [2, 3, 4]),\
        "Failed to fit with multiple chains"
Beispiel #12
0
def compute_fits_hmm(data: np.ndarray,
                     k: int,
                     mindist: int,
                     type='HMM',
                     y=None,
                     t1=None,
                     ind1=None,
                     zs=False):
    if type == 'HMM':
        hmm = HMM(k)
    elif type == 'HMMsplit':
        hmm = HMM(k, split_merge=True)

    if zs == True:
        data = zscore(data, axis=0, ddof=1)
    hmm.fit(data)

    if y is None:
        tdata = data
    else:
        if zs == True:
            y = zscore(y, axis=0, ddof=1)
        tdata = y

    _, LL_HMM = hmm.find_events(tdata)

    hmm_bounds = np.insert(np.diff(np.argmax(hmm.segments_[0], axis=1)), 0,
                           0).astype(int)

    if t1 is None and ind1 is None:
        ind = np.triu(np.ones(tdata.shape[0], bool), mindist)
        z = GSBS._zscore(tdata)
        t = np.cov(z)[ind]
    else:
        ind = ind1
        t = t1

    stateseq = deltas_states(deltas=hmm_bounds)[:, None]
    diff, same, alldiff = (lambda c: (c == 1, c == 0, c > 0))(cdist(
        stateseq, stateseq, "cityblock")[ind])
    WAC_HMM = np.mean(t[same]) - np.mean(t[alldiff])
    tdist_HMM = 0 if sum(same) < 2 else ttest_ind(
        t[same], t[diff], equal_var=False)[0]

    return LL_HMM, WAC_HMM, tdist_HMM, hmm_bounds, t, ind
Beispiel #13
0
     subj_id_train_inner = subj_id_all_inner[subj_id_train_inner]
     subj_id_test_inner = subj_id_all_inner[subj_id_test_inner]
     print("-Train:", subj_id_train_inner,"Validate:", subj_id_test_inner,
           ', now try different k...')
 
     D_train = D[:,:,subj_id_train_inner] #14 subjects to be trained
     D_val = D[:,:,subj_id_test_inner] #5 subjects for validation
     D_test = D[:,:,subj_id_test_outer] #held-out subject
     for knum in range(len(ks)): #loop through all possible K values
         #ev = brainiak.eventseg.event.EventSegment(ks[knum]) #set up HMM
         if splitm: #if running split_merge
             ev = EventSegment(ks[knum],split_merge=True)
         else: #if no split_merge
             ev = EventSegment(ks[knum])
         ev.fit(D_train.mean(2).T) #fit to average pattern of training set
         segments,ll=ev.find_events(D_val.mean(2).T)#grab segments + log lik on average of validation set
         #print('log likelihood for k=%s is %s' %(ks[knum],ll)) #uncomment to see ll values
         loglik[subj_id_test_outer,jj,knum] = ll #store loglik value for this K
     all_ll[jj,:]=loglik[subj_id_test_outer,jj,:]
     best_ll[subj_id_test_outer,jj] = np.max(loglik[subj_id_test_outer,jj,:]) #best loglik!
     jj=jj+1
 mean_all_ll=np.mean(all_ll,axis=0) #grab mean log lik
 fill3=mean_all_ll.argsort() #sort by log likelihood value
 fill3=fill3[len(ks)-1] #find best K
 print('Best K = %s' %fill3) #print best K
 best_k_subj[subj_id_test_outer]=ks[fill3] #store best k to assign for this sub
 if splitm: #if running split_merge
     ev = brainiak.eventseg.event.EventSegment(ks[fill3],split_merge=True) #re-train model w/ this best K
 else: #if no split_merge
     ev = brainiak.eventseg.event.EventSegment(ks[fill3]) #re-train model w/ this best K
 #ev.fit(D[:,:,np.arange(nS) != subj_id_test_outer].mean(2).T) #fit
What if we don't want to prespecify the number of events, but instead want to determine the number of events from the data? One way to determine the best number of events is to fit the model on a training set and then test the model fit on independent subjects.

k_array = np.arange(20, 61, 10)
test_ll = np.zeros(len(k_array))

for i, k in enumerate(k_array):
    print('Trying %d events' % k)
    
    print('   Fitting model on training subjects...')
    movie_train = np.mean(movie[:8], axis = 0)
    movie_HMM = EventSegment(k)
    movie_HMM.fit(movie_train)
    
    print('   Testing model fit on held-out subjects...')
    movie_test = np.mean(movie[8:], axis = 0)
    _, test_ll[i] = movie_HMM.find_events(movie_test)

plt.plot(k_array, test_ll)
plt.xlabel('Number of events')
plt.ylabel('Log-likelihood')

movie_dur = nTRs * 1.5  # Data acquired every 1.5 seconds
secax = plt.gca().secondary_xaxis('top', functions=(lambda x: movie_dur / x, lambda x: movie_dur / x))
secax.set_xlabel('Average event length (sec)')

#### 1.3 Optimal segmentation with the HMM
Since 40 events maximized the test log-likelihood, we'll generate two versions of HMM boundaries using 40 events. In addition to the "vanilla" HMM, we'll run an HMM with more flexibility during fitting (allowing for split-merge operations). This is slower (and so should usually only be used for generating a final segmentation), but can produce better fits if events are very uneven in duration. We will use these segmentations below for comparison with an alternative event segmentation method (GSBS) and with human labeled event boundaries.

print('Fitting HMM with 40 events...')
HMM40 = EventSegment(n_events = 40)
HMM40.fit(movie_group)