def test_svgp_model(self): data = generate_data(sample_size=4, n_sites=2, randseed=3) m = pynm.PyNM(data) m.gp_normative_model(method='approx') assert 'GP_pred' in m.data.columns assert math.isclose(0, m.data['GP_residuals'].mean(), abs_tol=0.5)
def test_get_masks_all_CON(self): a = np.array(list(range(12))) data = generate_data(randseed=1) m = pynm.PyNM(data) ctr, prob = m._get_masks() assert a[ctr].shape[0] == 12 assert a[prob].shape[0] == 0
def test_het_res(self): data = dataset_het() m = pynm.PyNM(data, score='score',conf='x',confounds = ['x'],group='train_sample') with pytest.warns(Warning) as record: m.gp_normative_model(method='exact') assert len(record) == 1 assert record[0].message.args[0] == "The residuals are heteroskedastic!"
def test_get_masks(self): a = np.array(list(range(6))) data = generate_data(randseed=3) m = pynm.PyNM(data) ctr, prob = m._get_masks() assert a[ctr].shape[0] == 5 assert a[prob][0] == 3
def test_use_approx_exact(self): data = generate_data(randseed=3,sample_size=2000) m = pynm.PyNM(data) with pytest.warns(Warning) as record: use_approx = m._use_approx(method='exact') assert len(record) == 1 assert record[0].message.args[0] == "Exact GP model with over 2000 data points requires large amounts of time and memory, continuing with exact model." assert use_approx == False
def test_get_conf_mat(self): data = generate_data(randseed=3) m = pynm.PyNM(data) conf_mat = m._get_conf_mat() assert conf_mat.shape[0] == 6 assert conf_mat.shape[1] == 3 for i in range(3): assert not isinstance(conf_mat[0, i], str)
def test_svgp_init(self): from pynm.approx import SVGP data = generate_data(randseed=3) m = pynm.PyNM(data) conf_mat = m._get_conf_mat() ctr,prob = m._get_masks() score = m._get_score() svgp = SVGP(conf_mat,score,ctr) assert svgp.n_train == 5 assert svgp.n_test == 6
def test_svgp_train(self): from pynm.approx import SVGP data = generate_data(randseed=3) m = pynm.PyNM(data) conf_mat = m._get_conf_mat() ctr,prob = m._get_masks() score = m._get_score() svgp = SVGP(conf_mat,score,ctr) svgp.train(num_epochs = 2) assert len(svgp.loss) == 2
def test_svgp_predict(self): from pynm.approx import SVGP data = generate_data(randseed=3) m = pynm.PyNM(data) conf_mat = m._get_conf_mat() ctr,prob = m._get_masks() score = m._get_score() svgp = SVGP(conf_mat,score,ctr) svgp.train(num_epochs = 2) means,sigmas = svgp.predict() assert means.size(0) == 6 assert sigmas.size(0) == 6
def main(): params = vars(_cli_parser()) confounds = params['confounds'].split(',') data = pd.read_csv(params['pheno_p']) m = pynm.PyNM(data, params['score'], params['group'], params['conf'], confounds, params['train_sample']) #Add a column to data w/ number controls used in this bin m.bins_num() #Run models m.loess_normative_model() m.centiles_normative_model() m.gp_normative_model(length_scale=args.length_scale, nu=args.nu, method=args.method, batch_size=args.batch_size, n_inducing=args.n_inducing, num_epochs=args.num_epochs) m.data.to_csv(args.out_p, index=False)
def test_set_group_names_PROB_CON_all_CON(self): data = generate_data(randseed=1) m = pynm.PyNM(data) assert m.CTR == 'CTR' assert m.PROB == 'PROB'
def test_set_group_names_01(self): data = generate_data(randseed=3, group='01') m = pynm.PyNM(data) assert m.CTR == 0 assert m.PROB == 1
def test_set_group_manual_zero_col(self): data = generate_data(randseed=3, group='01') data['train_sample'] = 0 with pytest.raises(ValueError): m = pynm.PyNM(data,train_sample='manual')
def test_set_group_33(self): data = generate_data(randseed=3, group='01') m = pynm.PyNM(data,train_sample='0.33') assert m.group == 'train_sample' assert m.data['train_sample'].sum() == 1 assert m.data[(m.data['train_sample']==1) & (m.data['group']== 1)].shape[0] == 0
def test_create_bins(self): data = generate_data(randseed=3) m = pynm.PyNM(data,bin_spacing=8,bin_width=1.5) m.centiles_normative_model() assert m.bins is not None
def test_set_group_manual_good_col(self): data = generate_data(randseed=3, group='01') data['train_sample'] = [1,1,0,0,0,0] m = pynm.PyNM(data,train_sample='manual') assert m.PROB == 0 assert m.group == 'train_sample'
def test_homo_res(self): data = dataset_homo() m = pynm.PyNM(data, score='score',conf='x',confounds = ['x'],group='train_sample') with pytest.warns(None) as record: m.gp_normative_model(method='exact') assert len(record) == 0
def test_bins_num(self): data = generate_data(randseed=11) m = pynm.PyNM(data,bin_spacing=5, bin_width=10) m._create_bins() assert len(m.bins) == 6
def test_loess_rank(self): data = generate_data(randseed=11) m = pynm.PyNM(data,bin_spacing=8,bin_width=1.5) m.loess_normative_model() assert np.sum(m.data.LOESS_rank) == 1
def test_plot(self): data = generate_data(randseed=3) m = pynm.PyNM(data) m.gp_normative_model() assert m.plot() is None
def test_use_approx_approx(self): data = generate_data(randseed=3,sample_size=1000) m = pynm.PyNM(data) assert m._use_approx(method='approx') == True
def test_use_approx_auto_small(self): data = generate_data(randseed=3) m = pynm.PyNM(data) assert m._use_approx(method='auto') == False
def test_set_group_controls(self): data = generate_data(randseed=3, group='01') m = pynm.PyNM(data,train_sample='controls') assert m.group == 'group'
def test_loess_normative_model(self): data = generate_data(randseed=11) m = pynm.PyNM(data,bin_spacing=8,bin_width=1.5) m.loess_normative_model() assert math.isclose(2.3482, np.sum(m.data.LOESS_pred), abs_tol=0.00001)
def test_centiles_normative_model(self): data = generate_data(randseed=11) m = pynm.PyNM(data,bin_spacing=8,bin_width=1.5) m.centiles_normative_model() assert np.sum(m.data.Centiles) == 446