def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm1 = SLM(FixedEffect(1), FixedEffect(1)) slm2 = SLM(FixedEffect(1), FixedEffect(2)) for key in idic.keys(): if "1" in key: setattr(slm1, key[4:], idic[key]) elif "2" in key: setattr(slm2, key[4:], idic[key]) # run f test outdic = f_test(slm1, slm2) # load expected outout data efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() testout = [] for key in expdic.keys(): comp = np.allclose( getattr(outdic, key), expdic[key], rtol=1e-05, equal_nan=True ) testout.append(comp) assert all(flag == True for (flag) in testout)
def create_parameter_grid(samples, predictors): """Creates a parameter grid for the test function. Returns ------- ParameterGrid All pairings of parameters to be run through the SLM class. """ model = [ FixedEffect(1) + FixedEffect(np.random.rand(samples, predictors), names=["y1", "y2", "y3"]) ] Y_idx = [1, 2, 3] contrast = [np.random.rand(samples), FixedEffect(np.random.rand(samples))] surf = [None, read_surface_gz(fetch_surf_fsaverage()["pial_left"])] mask = [None, np.random.rand(10242) > 0.1] correction = [None, ["rft", "fdr"]] two_tailed = [False, True] param_grid = ParameterGrid({ "Y_idx": Y_idx, "model": model, "contrast": contrast, "surf": surf, "mask": mask, "correction": correction, "two_tailed": two_tailed, }) return param_grid
def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm = SLM(FixedEffect(1), FixedEffect(1)) for key in idic.keys(): setattr(slm, key, idic[key]) resels_py, reselspvert_py, edg_py = compute_resels(slm) out = {} out["resels"] = resels_py out["reselspvert"] = reselspvert_py out["edg"] = edg_py # load expected outout data efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() testout = [] for key in out.keys(): if out[key] is not None and expdic[key] is not None: comp = np.allclose(out[key], expdic[key], rtol=1e-05, equal_nan=True) testout.append(comp) assert all(flag == True for (flag) in testout)
def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm = SLM(FixedEffect(1), FixedEffect(1)) for key in idic.keys(): setattr(slm, key, idic[key]) # run _t_test t_test(slm) # load expected outout data efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() testout = [] for key in expdic.keys(): comp = np.allclose(getattr(slm, key), expdic[key], rtol=1e-05, equal_nan=True) testout.append(comp) assert all(flag == True for (flag) in testout)
def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm = SLM(FixedEffect(1), FixedEffect(1)) slm.t = idic["t"] slm.tri = idic["tri"] slm.mask = idic["mask"] slm.df = idic["df"] slm.k = idic["k"] slm.resl = idic["resl"] thresh = idic["thresh"] reselspvert = idic["reselspvert"] edg = idic["edg"] # call python function P_peak, P_clus, P_clusid = peak_clus(slm, thresh, reselspvert, edg) # load expected outout data efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() O_peak = expdic["peak"] O_clus = expdic["clus"] O_clusid = expdic["clusid"] testout = [] if isinstance(P_peak, dict): for key in P_peak.keys(): comp = np.allclose(P_peak[key], O_peak[key], rtol=1e-05, equal_nan=True) testout.append(comp) else: comp = np.allclose(P_peak, O_peak, rtol=1e-05, equal_nan=True) if isinstance(P_clus, dict): for key in P_clus.keys(): comp = np.allclose(P_clus[key], O_clus[key], rtol=1e-05, equal_nan=True) else: comp = np.allclose(P_clus, O_clus, rtol=1e-05, equal_nan=True) testout.append(comp) testout.append(np.allclose(P_clusid, O_clusid, rtol=1e-05, equal_nan=True)) assert all(flag == True for (flag) in testout)
def generate_random_two_slms(I): slm1 = SLM(FixedEffect(1), FixedEffect(1)) slm2 = SLM(FixedEffect(1), FixedEffect(2)) for key in I.keys(): if "1" in key: setattr(slm1, key[4:], I[key]) elif "2" in key: setattr(slm2, key[4:], I[key]) return slm1, slm2
def generate_t_test_out(I): slm = SLM(FixedEffect(1), FixedEffect(1)) for key in I.keys(): setattr(slm, key, I[key]) # run t_test t_test(slm) expkeys = ["X", "df", "coef", "SSE", "c", "k", "ef", "sd", "t"] D = {} for key in expkeys: D[key] = getattr(slm, key) return D
def dummy_test(infile, expfile, simple=True): ifile = open(infile, "br") Din = pickle.load(ifile) ifile.close() Y = Din["Y"] M = Din["M"] # assign slm params slm = SLM(M, FixedEffect(1)) if "tri" in Din: slm.surf = {"tri": Din["tri"]} if "lat" in Din: slm.surf = {"lat": Din["lat"]} # here we go --> run the linear model slm.linear_model(Y) ofile = open(expfile, "br") Dout = pickle.load(ofile) ofile.close() # compare... testout = [] for makey_ in Dout.keys(): comp = np.allclose(getattr(slm, makey_), Dout[makey_], rtol=1e-05, equal_nan=True) testout.append(comp) assert all(flag == True for (flag) in testout)
def get_linmod_output(Y, M, foutname, tri=None, lat=None): """Runs linmod and returns all relevant output.""" slm = SLM(M, FixedEffect(1)) if tri is not None: slm.surf = {"tri": tri} if lat is not None: slm.lat = {"lat": lat} slm.linear_model(Y) keys = [ "cluster_threshold", "coef", "df", "drlim", "niter", "resl", "SSE", "thetalim", "X", "tri", ] D = {} for key in keys: if getattr(slm, key) is not None: D[key] = getattr(slm, key) with open(foutname, "wb") as handle: pickle.dump(D, handle, protocol=4) return D
def array2effect(A, n_random=0): """Converts an input array to a set of effects. Parameters ---------- A : np.array A samples-by-effects array. n_random : int, optional Number of random effects, by default 0. Random effects are selected from the first columns of A. Returns ------- brainstat.stats.terms.FixedEffect, brainstat.stats.terms.MixedEffect The fixed/mixed effects. """ fixed_effects = FixedEffect(A[:, n_random:]) if n_random != 0: mixed_effects = MixedEffect( A[:, :n_random], name_ran=["f" + str(x) for x in range(n_random)], ) return fixed_effects + mixed_effects else: return fixed_effects
def get_fdr_output(D, foutname): """Runs fdr and returns all relevant output.""" slm = SLM(FixedEffect(1), FixedEffect(1)) for key in D.keys(): setattr(slm, key, D[key]) # run fdr Q = fdr(slm) Q_out = {} Q_out["Q"] = Q with open(foutname, "wb") as handle: pickle.dump(Q_out, handle, protocol=4) # return
def test_fixed_init(): """Tests the initialization of the FixedEffect class.""" random_data = np.random.random_sample((10, 1)) fix1 = FixedEffect(random_data, ["x0"]) fix2 = FixedEffect(random_data, ["x0"], add_intercept=False) assert np.array_equal(fix1.m.shape, [10, 2]) assert np.array_equal(fix1.names, ["intercept", "x0"]) assert np.array_equal(fix2.m.shape, [10, 1]) assert np.array_equal(fix2.names, ["x0"]) categorical_array = pd.DataFrame({"Sex": ["M", "M", "M", "F", "F"]}) fix3 = FixedEffect(categorical_array) assert np.array_equal(fix3.Sex_M, [1, 1, 1, 0, 0]) assert np.array_equal(fix3.Sex_F, [0, 0, 0, 1, 1]) fix4 = FixedEffect(1) assert np.array_equal(fix4.intercept, [1])
def dummy_test(infile, expfile): ifile = open(infile, "br") Din = pickle.load(ifile) ifile.close() Y = Din["Y"] M = Din["M"] # Convert M to a true BrainStat model fixed_effects = FixedEffect(M[:, Din["n_random"]:]) if Din["n_random"] != 0: mixed_effects = MixedEffect( M[:, :Din["n_random"]], name_ran=["f" + str(x) for x in range(Din["n_random"])], ) M = fixed_effects + mixed_effects else: M = fixed_effects # assign slm params slm = SLM(M, FixedEffect(1), surf=Din["surf"]) # here we go --> run the linear model slm._linear_model(Y) ofile = open(expfile, "br") Dout = pickle.load(ofile) ofile.close() # compare... testout = [] for k, v in Dout.items(): if k == "surf": # Surface data is only stored for reconstruction in MATLAB. continue a = getattr(slm, k) comp = np.allclose(a, v, rtol=1e-05, equal_nan=True) testout.append(comp) assert all(testout)
def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm = SLM(FixedEffect(1), FixedEffect(1)) for key in idic.keys(): setattr(slm, key, idic[key]) # run fdr Q = fdr(slm) # load expected outout data efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() assert np.allclose(Q, expdic["Q"])
def test_volumetric_input(): mask_image = nib.load( tflow.get("MNI152Lin", resolution="02", desc="brain", suffix="mask")) n_voxels = (mask_image.get_fdata() != 0).sum() n_subjects = 3 data = np.random.rand(n_subjects, n_voxels) model = FixedEffect(1) contrast = np.ones(3) slm = SLM(model, contrast, surf=mask_image) slm.fit(data)
def test_fixed_overload(): """Tests the overloads of the FixedEffect class.""" random_data = np.random.random_sample((10, 3)) fix01 = FixedEffect(random_data[:, :2], ["x0", "x1"], add_intercept=False) fix12 = FixedEffect(random_data[:, 1:], ["x2", "x3"], add_intercept=False) fix2 = FixedEffect(random_data[:, 2], ["x2"], add_intercept=False) fixi0 = FixedEffect(random_data[:, 0], ["x0"], add_intercept=True) fixi1 = FixedEffect(random_data[:, 1], ["x1"], add_intercept=True) fix_add = fix01 + fix12 assert np.array_equal(fix_add.m, random_data) fix_add_intercept = 1 + FixedEffect(random_data[:, 0]) assert np.array_equal(fixi0.m, fix_add_intercept.m) fix_add_intercept = fixi0 + fixi1 expected = np.concatenate((np.ones((10, 1)), random_data[:, 0:2]), axis=1) assert np.array_equal(fix_add_intercept.m, expected) fix_sub = fix01 - fix12 assert np.array_equal(fix_sub.m, random_data[:, 0][:, None]) fix_mul = fix01 * fix2 assert np.array_equal(fix_mul.m, random_data[:, :2] * random_data[:, 2][:, None])
def generate_random_slm(rand_dict): """Generates a valid SLM for a surface. Parameters ---------- surf : BSPolyData or a dictionary with key 'tri' Brain surface. Returns ------- brainstat.stats.SLM SLM object. """ # this is going to be the input slm I = {} rand_slm = SLM(FixedEffect(1), FixedEffect(1)) for key in rand_dict.keys(): setattr(rand_slm, key, rand_dict[key]) I[key] = rand_dict[key] # this is going to be the output dict O = {} O["resels"], O["reselspvert"], O["edg"] = compute_resels(rand_slm) return I, O
def generate_slm(**kwargs): """Generates a SLM with the given attributes Parameters ---------- All attributes of SLM can be provided as keyword arguments. Returns ------- brainstat.stats.SLM.SLM SLM object. """ slm = SLM(FixedEffect(1), 1) for key, value in kwargs.items(): setattr(slm, key, value) return slm
def copy_slm(slm): """Copies an SLM object. Parameters ---------- slm : brainstat.stats.SLM.SLM SLM object. Returns ------- brainstat.stats.SLM.SLM SLM object. """ slm_out = SLM(FixedEffect(1), 1) for key in slm.__dict__: setattr(slm_out, key, getattr(slm, key)) return slm_out
def generate_random_test_data( Y_dim, M_dim, finname, seed=0, triD=None, latD=None, M_term=False, add_intercept=True, ): """Generate random test datasets.""" # Y_dim : tuple # M_dim : tuple # finname : filename ending with *pkl np.random.seed(seed=seed) Y = np.random.random_sample(Y_dim) M = np.random.random_sample(M_dim) if add_intercept: M = np.concatenate((np.ones((M_dim[0], 1)), M), axis=1) if M_term: M = FixedEffect(M) D = {} D["Y"] = Y D["M"] = M if triD is not None: tri = np.random.randint(triD["tri_min"], triD["tri_max"], size=triD["tri_dim"]) D["tri"] = tri if latD is not None: lat = np.random.randint(latD["lat_min"], latD["lat_max"], size=latD["lat_dim"]) D["lat"] = lat with open(finname, "wb") as handle: pickle.dump(D, handle, protocol=4) if triD is not None: return Y, M, tri elif latD is not None: return Y, M, lat else: return Y, M
def generate_test_data(): np.random.seed(0) surface = _generate_sphere() parameters = [ { "n_observations": [103], "n_vertices": [np.array(get_points(surface)).shape[0]], "n_variates": [1, 2, 3], "n_predictors": [1, 7], "n_random": [0], "surf": [None, surface], }, { "n_observations": [103], "n_vertices": [np.array(get_points(surface)).shape[0]], "n_variates": [1], "n_predictors": [2, 7], "n_random": [1], "surf": [None, surface], }, ] test_num = 0 for params in ParameterGrid(parameters): test_num += 1 Y, M = generate_random_data_model( params["n_observations"], params["n_vertices"], params["n_variates"], params["n_predictors"], ) save_input_dict( {"Y": Y, "M": M, "surf": params["surf"], "n_random": params["n_random"]}, "xlinmod", test_num, ) model = array2effect(M, params["n_random"]) slm = SLM(model, FixedEffect(1), params["surf"]) slm.linear_model(Y) slm2files(slm, "xlinmod", test_num)
)) ################################################################### # Next, we will assess whether a subject's age is related to their cortical # thickness. To this end we can create a linear model with BrainStat. For our # first model, we will only consider the effect of age, i.e. we will disregard # the effect of sex and that some subjects visit twice. this end we can create a # linear model with BrainStat. First we declare the age variable as a # FixedEffect. The FixedEffect class can be created in two ways: either we # provide the data with pandas, as we do here, or we provide a numpy array and a # name for the fixed effect. Lets set up the model Y = intercept + B1 * age. Note # that BrainStat includes an intercept by default. from brainstat.stats.terms import FixedEffect term_age = FixedEffect(demographics.AGE_AT_SCAN) model = term_age ################################################################### # As said before, if your data is not in a pandas DataFrame (e.g. numpy), you'll # have to provide the name of the effect as an additional parameter as follows: term_age_2 = FixedEffect(demographics.AGE_AT_SCAN.to_numpy(), "AGE_AT_SCAN") ################################################################### # Lets have a look at one of these models. As you can see below, the model is # stored in a format closely resembling a pandas DataFrame. Note that an # intercept is automatically added to the model. This behavior can be disabled # in the FixedEffect call, but we recommend leaving it enabled. We can also # access the vectors related to each effect by their name i.e. model.intercept # and model.AGE_AT_SCAN will return the vectors of the intercept and age, respectively.
BrainStat. The context decoding module consists of three parts: genetic decoding, meta-analytic decoding and histological comparisons. Before we start, lets run a linear model testing for the effects of age on cortical thickness as we did in Tutorial 1. We'll use the results of this model later in this tutorial. """ from brainstat.datasets import fetch_mask, fetch_template_surface from brainstat.stats.SLM import SLM from brainstat.stats.terms import FixedEffect, MixedEffect from brainstat.tutorial.utils import fetch_mics_data thickness, demographics = fetch_mics_data() mask = fetch_mask("fsaverage5") term_age = FixedEffect(demographics.AGE_AT_SCAN) term_sex = FixedEffect(demographics.SEX) term_subject = MixedEffect(demographics.SUB_ID) model = term_age + term_sex + term_age * term_sex + term_subject contrast_age = -model.mean.AGE_AT_SCAN slm = SLM( model, contrast_age, surf="fsaverage5", mask=mask, correction=["fdr", "rft"], two_tailed=False, cluster_threshold=0.01, ) slm.fit(thickness)
def f_test(slm1: SLM, slm2: SLM) -> SLM: """F-statistics for comparing two uni- or multi-variate fixed effects models. Parameters ---------- slm1 : brainstat.stats.SLM.SLM Standard linear model returned by the t_test function; see Notes for details. slm2 : brainstat.stats.SLM.SLM Standard linear model returned by the t_test function; see Notes for details. Returns ------- brainstat.stats.SLM.SLM Standard linear model with f-test results included. """ if slm1.r is not None or slm2.r is not None: warnings.warn("Mixed effects models not programmed yet.") slm = SLM(FixedEffect(1), FixedEffect(1)) if slm1.df > slm2.df: X1 = slm1.X X2 = slm2.X df1 = slm1.df df2 = slm2.df SSE1 = slm1.SSE SSE2 = slm2.SSE for key in slm2.__dict__: setattr(slm, key, getattr(slm2, key)) else: X1 = slm2.X X2 = slm1.X df1 = slm2.df df2 = slm1.df SSE1 = slm2.SSE SSE2 = slm1.SSE for key in slm1.__dict__: setattr(slm, key, getattr(slm1, key)) r = X1 - np.dot(np.dot(X2, np.linalg.pinv(X2)), X1) d = np.sum(r.flatten() ** 2) / np.sum(X1.flatten() ** 2) if d > np.spacing(1): print("Models are not nested.") return slm.df = np.array([[df1 - df2, df2]]) h = SSE1 - SSE2 # if slm['coef'] is 3D and third dimension is 1, then squeeze it to 2D if np.ndim(slm.coef) == 3 and np.shape(slm.coef)[2] == 1: x1, x2, x3 = np.shape(slm.coef) slm.coef = slm.coef.reshape(x1, x2) if np.ndim(slm.coef) == 2: slm.k = np.array(1) slm.t = np.dot(h / (SSE2 + (SSE2 <= 0)) * (SSE2 > 0), df2 / (df1 - df2)) elif np.ndim(slm.coef) > 2: k2, v = np.shape(SSE2) k = np.around((np.sqrt(1 + 8 * k2) - 1) / 2) slm.k = np.array(k) if k > 3: print("Roy's max root for k>3 not programmed yet.") return l = min(k, df1 - df2) slm.t = np.zeros((int(l), int(v))) if k == 2: det = SSE2[0, :] * SSE2[2, :] - SSE2[1, :] ** 2 a11 = SSE2[2, :] * h[0, :] - SSE2[1, :] * h[1, :] a21 = SSE2[0, :] * h[1, :] - SSE2[1, :] * h[0, :] a12 = SSE2[2, :] * h[1, :] - SSE2[1, :] * h[2, :] a22 = SSE2[0, :] * h[2, :] - SSE2[1, :] * h[1, :] a0 = a11 * a22 - a12 * a21 a1 = (a11 + a22) / 2 s1 = np.array([sqrt(x) for x in (a1**2 - a0)]).real d = (df2 / (df1 - df2)) / (det + (det <= 0)) * (det > 0) slm.t[0, :] = (a1 + s1) * d if l == 2: slm.t[1, :] = (a1 - s1) * d if k == 3: det = ( SSE2[0, :] * (SSE2[2, :] * SSE2[5, :] - SSE2[4, :] ** 2) - SSE2[5, :] * SSE2[1, :] ** 2 + SSE2[3, :] * (SSE2[1, :] * SSE2[4, :] * 2 - SSE2[2, :] * SSE2[3, :]) ) m1 = SSE2[2, :] * SSE2[5, :] - SSE2[4, :] ** 2 m3 = SSE2[0, :] * SSE2[5, :] - SSE2[3, :] ** 2 m6 = SSE2[0, :] * SSE2[2, :] - SSE2[1, :] ** 2 m2 = SSE2[3, :] * SSE2[4, :] - SSE2[1, :] * SSE2[5, :] m4 = SSE2[1, :] * SSE2[4, :] - SSE2[2, :] * SSE2[3, :] m5 = SSE2[1, :] * SSE2[3, :] - SSE2[0, :] * SSE2[4, :] a11 = m1 * h[0, :] + m2 * h[1, :] + m4 * h[3, :] a12 = m1 * h[1, :] + m2 * h[2, :] + m4 * h[4, :] a13 = m1 * h[3, :] + m2 * h[4, :] + m4 * h[5, :] a21 = m2 * h[0, :] + m3 * h[1, :] + m5 * h[3, :] a22 = m2 * h[1, :] + m3 * h[2, :] + m5 * h[4, :] a23 = m2 * h[3, :] + m3 * h[4, :] + m5 * h[5, :] a31 = m4 * h[0, :] + m5 * h[1, :] + m6 * h[3, :] a32 = m4 * h[1, :] + m5 * h[2, :] + m6 * h[4, :] a33 = m4 * h[3, :] + m5 * h[4, :] + m6 * h[5, :] a0 = ( -a11 * (a22 * a33 - a23 * a32) + a12 * (a21 * a33 - a23 * a31) - a13 * (a21 * a32 - a22 * a31) ) a1 = a22 * a33 - a23 * a32 + a11 * a33 - a13 * a31 + a11 * a22 - a12 * a21 a2 = -(a11 + a22 + a33) q = a1 / 3 - a2**2 / 9 r = (a1 * a2 - 3 * a0) / 6 - a2**3 / 27 s1 = (r + [sqrt(x) for x in (q**3 + r**2)]) ** (1 / 3) z = np.zeros((3, v)) z[0, :] = 2 * s1.real - a2 / 3 z[1, :] = -s1.real - a2 / 3 + np.sqrt(3) * s1.imag z[2, :] = -s1.real - a2 / 3 - np.sqrt(3) * s1.imag if not np.count_nonzero(z) == 0: z.sort(axis=0) z = z[::-1] d = df2 / (df1 - df2) / (det + (det <= 0)) * (det > 0) for j in range(0, l): slm.t[j, :] = z[j, :] * d return slm
def dummy_test(infile, expfile): # load input test data ifile = open(infile, "br") idic = pickle.load(ifile) ifile.close() slm = SLM(FixedEffect(1), FixedEffect(1)) # Data are saved a little differently from the actual input due to compatibility with MATLAB. # Data wrangle a bit to bring it back into the Python input format. for key in idic.keys(): if key == "Y": # Y is input for slm.fit(), not a property. continue if key == "model": # Model is saved as a matrix rather than a Fixed/MixedEffect if idic[key].shape[1] == 1: idic[key] = FixedEffect(1) + FixedEffect(idic[key]) else: idic[key] = (FixedEffect(1) + FixedEffect(idic[key][:, 0]) + MixedEffect(idic[key][:, 1]) + MixedEffect(1)) setattr(slm, key, idic[key]) if key == "surf" and slm.surf is not None: slm.surf["tri"] += 1 slm.fit(idic["Y"]) # load expected outout data efile = open(expfile, "br") out = pickle.load(efile) efile.close() # Format of self.P changed since files were created -- alter out to match some changes. # Combine the list outputs, sort with pandas, and return to list. if "P" in out: out["P"]["pval"]["C"] = _onetailed_to_twotailed( out["P"]["pval"]["C"][0], out["P"]["pval"]["C"][1]) for key1 in ["peak", "clus"]: P_tmp = [] none_squeeze = lambda x: np.squeeze(x) if x is not None else None for i in range(len(out["P"][key1]["P"])): tail_dict = { key: none_squeeze(value[i]) for key, value in out["P"][key1].items() } if tail_dict["P"] is not None: if tail_dict["P"].size == 1: P_tmp.append(pd.DataFrame.from_dict([tail_dict])) else: P_tmp.append(pd.DataFrame.from_dict(tail_dict)) P_tmp[i].sort_values(by="P", ascending=True) else: P_tmp.append(pd.DataFrame(columns=tail_dict.keys())) out["P"][key1] = P_tmp testout = [] skip_keys = ["model", "correction", "_tri", "surf"] for key in out.keys(): if key in skip_keys: continue if key == "P": testout.append(recursive_comparison(out[key], getattr(slm, key))) elif out[key] is not None: comp = np.allclose(out[key], getattr(slm, key), rtol=1e-05, equal_nan=True) testout.append(comp) assert all(flag == True for (flag) in testout)
def generate_data_test_fdr(): ### test_01 data in-out generation print("test_fdr: test_01 data is generated..") # random data shape matching a real-data set # ['t'] : np array, shape (1, 64984), float64 # ['df'] : int # ['k'] : int t_dim = (1, 64984) df_max = 64984 finname = datadir("xstatq_01_IN.pkl") D = generate_random_fdr_data(t_dim, df_max, finname, seed=444) foutname = datadir("xstatq_01_OUT.pkl") get_fdr_output(D, foutname) ### test_02 data in-out generation print("test_fdr: test_02 data is generated..") # random data # ['t'] : np array, shape (1, 9850), float64 # ['df'] : int # ['k'] : int t_dim = (1, 9850) df_max = 1000 finname = datadir("xstatq_02_IN.pkl") D = generate_random_fdr_data(t_dim, df_max, finname, seed=445) foutname = datadir("xstatq_02_OUT.pkl") get_fdr_output(D, foutname) ### test_03 data in-out generation print("test_fdr: test_03 data is generated..") # similar to test_02, shapes/values of slm['t'] and slm['df'] manipulated # ['t'] : np array, shape (1, 2139), float64 # ['df'] : int # ['k'] : int t_dim = (1, 2139) df_max = 2000 k = 3 finname = datadir("xstatq_03_IN.pkl") D = generate_random_fdr_data(t_dim, df_max, finname, k=k, seed=446) foutname = datadir("xstatq_03_OUT.pkl") get_fdr_output(D, foutname) ### test_04 data in-out generation print("test_fdr: test_04 data is generated..") # similar to test_02 + optional input ['mask'] # ['t'] : np array, shape (1, 2475), float64 # ['df'] : int # ['k'] : int # ['mask'] : np array, shape (2475,), bool t_dim = (1, 2475) df_max = 1500 finname = datadir("xstatq_04_IN.pkl") mask_dim = 2475 D = generate_random_fdr_data(t_dim, df_max, finname, mask_dim=mask_dim, seed=447) foutname = datadir("xstatq_04_OUT.pkl") get_fdr_output(D, foutname) ### test_05 data in-out generation print("test_fdr: test_05 data is generated..") # similar to test_02 + optional input slm['dfs'] # ['t'] : np array, shape (1, 1998), float64 # ['df'] : int # ['k'] : int # ['dfs'] : np array, shape (1, 1998), int64 t_dim = (1, 1998) df_max = 4000 dfs_max = 1997 finname = datadir("xstatq_05_IN.pkl") D = generate_random_fdr_data(t_dim, df_max, finname, dfs_max=dfs_max, seed=448) foutname = datadir("xstatq_05_OUT.pkl") get_fdr_output(D, foutname) ### test_06 data in-out generation print("test_fdr: test_06 data is generated..") # similar to test_02 + optional inputs slm['dfs'] and ['mask'] # ['t'] : np array, shape (1, 3328), float64 # ['df'] : np array, shape (1, 1), int64 # ['k'] : int # ['dfs'] : np array, shape (1, 3328), int64 # ['mask'] : np array, shape (3328,), bool t_dim = (1, 3328) df_max = 10000 k = 2 dfs_max = 3328 mask_dim = 3328 finname = datadir("xstatq_06_IN.pkl") D = generate_random_fdr_data(t_dim, df_max, finname, k=k, dfs_max=dfs_max, mask_dim=mask_dim, seed=449) foutname = datadir("xstatq_06_OUT.pkl") get_fdr_output(D, foutname) ### test_07 data in-out generation print("test_fdr: test_07 data is generated..") # similar to test_02 + optional inputs slm['dfs'], ['mask'] and ['tri'] # ['t'] : np array, shape (1, 9512), float64 # ['df'] : int # ['k'] : int # ['dfs'] : np array, shape (1, 9512), int64 # ['mask'] : np array, shape (9512,), bool # ['tri'] : np array, shape (1724, 3), int64 t_dim = (1, 9512) df_max = 5000 dfs_max = 9511 mask_dim = 9512 tri_dim = (1724, 3) finname = datadir("xstatq_07_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, dfs_max=dfs_max, mask_dim=mask_dim, tri_dim=tri_dim, seed=450, ) foutname = datadir("xstatq_07_OUT.pkl") get_fdr_output(D, foutname) ### test_08 data in-out generation print("test_fdr: test_08 data is generated..") # similar to test_02 + optional inputs slm['dfs'], slm['tri'] and slm['resl'] # ['t'] : np array, shape (1, 1520), float64 # ['df'] : int # ['k'] : int # ['dfs'] : np array, shape (1, 1520), int64 # ['tri'] : np array, shape (4948, 3), int64 # ['resl'] : np array, shape (1520, 1), float64 t_dim = (1, 1520) df_max = 5000 k = 5 dfs_max = 9 tri_dim = (4948, 3) resl_dim = (1520, 1) finname = datadir("xstatq_08_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, k=k, dfs_max=dfs_max, tri_dim=tri_dim, resl_dim=resl_dim, seed=451, ) foutname = datadir("xstatq_08_OUT.pkl") get_fdr_output(D, foutname) ### test_09 data in-out generation print("test_fdr: test_09 data is generated..") # similar to test_08 + values/shapes of input params changed + # additional input slm['du'] (non-sense for _fdr) # ['t'] : np array, shape (1, 4397), float64 # ['df'] : int # ['k'] : int # ['tri'] : np array, shape (2734, 3), int64 # ['resl'] : np array, shape (8199, 1), float64 # ['dfs'] : np array, shape (1, 4397), float64 # ['du'] : int t_dim = (1, 14397) df_max = 1 dfs_max = 2 tri_dim = (2734, 3) resl_dim = (8199, 1) # du = 9 finname = datadir("xstatq_09_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, dfs_max=dfs_max, tri_dim=tri_dim, resl_dim=resl_dim, # du = du, seed=452, ) foutname = datadir("xstatq_09_OUT.pkl") get_fdr_output(D, foutname) ### test_10 data in-out generation print("test_fdr: test_10 data is generated..") # similar to test_08 + + values/shapes of input params changed + additional # input slm['du'], slm['c'], slm['ef'], and slm['sd'] (non-sense for _fdr) # ['t'] : np array, shape (1, 20484), float64 # ['df'] : int # ['k'] : int # ['tri'] : np array, shape (40960, 3), int32 # ['resl'] : np array, shape (61440, 1), float64 # ['c'] : np array, shape (1, 2), float64 # ['ef'] : np array, shape (1, 20484), float64 # ['sd'] : np array, shape (1, 20484), float64 t_dim = (1, 20484) df_max = 10 tri_dim = (40960, 3) resl_dim = (61440, 1) c_dim = (1, 2) ef_dim = (1, 20484) sd_dim = (1, 20484) finname = datadir("xstatq_10_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, tri_dim=tri_dim, resl_dim=resl_dim, c_dim=c_dim, ef_dim=ef_dim, sd_dim=sd_dim, seed=453, ) foutname = datadir("xstatq_10_OUT.pkl") get_fdr_output(D, foutname) ### test_11 data in-out generation print("test_fdr: test_11 data is generated..") # similar to test_08 + additional input ['c'], ['ef'], ['sd'], ['X'], # and ['coef'], ['SSE'] (non-sense for _fdr) # ['t'] : np array, shape (1, 20484), float64 # ['df'] : int # ['k'] : int # ['tri'] : np array, shape (40960, 3), int32 # ['resl'] : np array, shape (61440, 1), float64 # ['c'] : np array, shape (1, 2), float64 # ['ef'] : np array, shape (1, 20484), float64 # ['sd'] : np array, shape (1, 20484), float64 # ['X'] : np array, shape (10, 2), float64 # ['coef'] : np array, shape (2, 20484), float64 # ['SSE'] : np array, shape (1, 20484), float64 t_dim = (1, 20484) df_max = 10 tri_dim = (40960, 3) resl_dim = (61440, 1) c_dim = (1, 2) ef_dim = (1, 20484) sd_dim = (1, 20484) X_dim = (10, 2) coef_dim = (2, 20484) SSE_dim = (1, 20484) finname = datadir("xstatq_11_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, tri_dim=tri_dim, resl_dim=resl_dim, c_dim=c_dim, ef_dim=ef_dim, sd_dim=sd_dim, X_dim=X_dim, coef_dim=coef_dim, SSE_dim=SSE_dim, seed=454, ) foutname = datadir("xstatq_11_OUT.pkl") get_fdr_output(D, foutname) ### test_12 data in-out generation print("test_fdr: test_12 data is generated..") # similar to test_11 + optional input ['mask'] + ['df'] dtype changed # ['t'] : np array, shape (1, 20484), float64 # ['df'] : uint8 # ['k'] : int # ['tri'] : np array, shape (40960, 3), int32 # ['resl'] : np array, shape (61440, 1), float64 # ['c'] : np array, shape (1, 2), float64 # ['ef'] : np array, shape (1, 20484), float64 # ['sd'] : np array, shape (1, 20484), float64 # ['X'] : np array, shape (10, 2), uint8 # ['coef'] : np array, shape (2, 20484), float64 # ['SSE'] : np array, shape (1, 20484), float64 # ['mask'] : np array, shape (20484,), bool t_dim = (1, 20484) df_max = 10 tri_dim = (40960, 3) resl_dim = (61440, 1) c_dim = (1, 2) ef_dim = (1, 20484) sd_dim = (1, 20484) X_dim = (10, 2) coef_dim = (2, 20484) SSE_dim = (1, 20484) mask_dim = 20484 finname = datadir("xstatq_12_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, tri_dim=tri_dim, mask_dim=mask_dim, resl_dim=resl_dim, c_dim=c_dim, ef_dim=ef_dim, sd_dim=sd_dim, X_dim=X_dim, coef_dim=coef_dim, SSE_dim=SSE_dim, seed=455, ) foutname = datadir("xstatq_12_OUT.pkl") get_fdr_output(D, foutname) ### test_13 data in-out generation print("test_fdr: test_13 data is generated..") # similar to test_10 + mask added # ['t'] : np array, shape (1, 20484), float64 # ['df'] : int64 # ['k'] : int # ['tri'] : np array, shape (40960, 3), int32 # ['resl'] : np array, shape (61440, 1), float64 # ['c'] : np array, shape (1, 9), float64 # ['ef'] : np array, shape (1, 20484), float64 # ['sd'] : np array, shape (1, 20484), float64 # ['X'] : np array, shape (20, 9), uint16 # ['coef'] : np array, shape (9, 20484), float64 # ['SSE'] : np array, shape (1, 20484), float64 t_dim = (1, 20484) df_max = 10 tri_dim = (40960, 3) resl_dim = (61440, 1) c_dim = (1, 2) ef_dim = (1, 20484) sd_dim = (1, 20484) mask_dim = 20484 finname = datadir("xstatq_13_IN.pkl") D = generate_random_fdr_data( t_dim, df_max, finname, tri_dim=tri_dim, resl_dim=resl_dim, c_dim=c_dim, mask_dim=mask_dim, ef_dim=ef_dim, sd_dim=sd_dim, seed=453, ) foutname = datadir("xstatq_13_OUT.pkl") get_fdr_output(D, foutname) #### test 14, real data print("test_fdr: test_14 data is generated..") # thickness_n10 data, slm and t_test run prior to fdr realdataf = datadir("thickness_n10.pkl") ifile = open(realdataf, "br") DD = pickle.load(ifile) ifile.close() # run slm M = FixedEffect(DD["M"]) slm = SLM(M, FixedEffect(1)) slm.linear_model(DD["Y"]) D = {} # run t-test t_test(slm) D["t"] = slm.t D["df"] = 10 D["k"] = 1 finname = datadir("xstatq_14_IN.pkl") with open(finname, "wb") as handle: pickle.dump(D, handle, protocol=4) foutname = datadir("xstatq_14_OUT.pkl") get_fdr_output(D, foutname)
def generate_test_data(): pial, mask, age, iq, thickness = load_training_data(n=20) fixed_model = FixedEffect(1) + FixedEffect(age, "age") mixed_model = ( FixedEffect(1) + FixedEffect(age, "age") + MixedEffect(iq, name_ran="iq") + MixedEffect(1, name_ran="Identity") ) variates_2 = np.concatenate( (thickness[:, :, None], np.random.random_sample(thickness.shape)[:, :, None]), axis=2, ) variates_3 = np.concatenate( ( thickness[:, :, None], np.random.rand(thickness.shape[0], thickness.shape[1], 2), ), axis=2, ) # Params 1: No surface, fixed effect. # Params 2: One-tailed mixed with theta/dr changes. # Params 3: With surface. and RFT correction. parameters = [ { "Y": [thickness, variates_2, variates_3], "model": [fixed_model], "contrast": [-age], "correction": [None, "fdr"], "surf": [None], "mask": [mask], "niter": [1], "thetalim": [0.01], "drlim": [0.1], "two_tailed": [True], "cluster_threshold": [0.001], }, { "Y": [thickness], "model": [mixed_model], "contrast": [-age], "correction": ["fdr"], "surf": [None, pial], "mask": [mask], "niter": [1], "thetalim": [0.01, 0.05], "drlim": [0.1, 0.2], "two_tailed": [False], "cluster_threshold": [0.001], }, { "Y": [thickness], "model": [fixed_model, mixed_model], "contrast": [-age], "surf": [pial], "mask": [mask], "correction": [None, ["fdr", "rft"]], "niter": [1], "thetalim": [0.01], "drlim": [0.1], "two_tailed": [True], "cluster_threshold": [0.001, 1.2], }, ] test_num = 0 for params in ParameterGrid(parameters): test_num += 1 slm = SLM( params["model"], params["contrast"], params["surf"], params["mask"], correction=params["correction"], niter=params["niter"], thetalim=params["thetalim"], drlim=params["drlim"], two_tailed=params["two_tailed"], cluster_threshold=params["cluster_threshold"], ) slm.fit(params["Y"]) # Save input/output if isinstance(params["model"], FixedEffect): params["model"] = age[:, None] else: params["model"] = np.concatenate((age[:, None], iq[:, None]), axis=1) dict2pkl(params, "slm", test_num, input=True) slm2files(slm, "slm", test_num, input=False)
thickness = np.zeros((n, 10242)) for i in range(n): thickness[i, :] = np.squeeze(nib.load(files[i, 0]).get_fdata()) mask = np.all(thickness != 0, axis=0) pial_left = read_surface_gz(fetch_surf_fsaverage()["pial_left"]) ################################################################### # Next, we can create a BrainStat linear model by declaring these variables as # terms. The term class requires two things: 1) an array or scalar, and 2) a # variable name for each column. Lastly, we can create the model by simply # adding the terms together. from brainstat.stats.terms import FixedEffect term_intercept = FixedEffect(1, names="intercept") term_age = FixedEffect(age, "age") term_iq = FixedEffect(iq, "iq") model = term_intercept + term_age + term_iq ################################################################### # We can also add interaction effects to the model by multiplying terms. model_interaction = term_intercept + term_age + term_iq + term_age * term_iq ################################################################### # Now, lets imagine we have some cortical marker (e.g. cortical thickness) for # each subject and we want to evaluate whether this marker changes with age # whilst correcting for effects of sex and age-sex interactions. from brainstat.stats.SLM import SLM