def check_weights_diff(covs, meth, weights, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) resw = clustered_model(covs, meth, model, weights=weights, **kwargs) for k in 'p coef'.split(): for i in range(len(res[k])): assert kwargs.get('bumping') or res[k][i] != resw[k][i], (k, i, res[k][i], resw[k][i]) for k in 'covariate model'.split(): for i in range(len(res[k])): assert res[k][i] == resw[k][i]
def check_weights_diff(covs, meth, weights, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) resw = clustered_model(covs, meth, model, weights=weights, **kwargs) for k in 'p coef'.split(): for i in range(len(res[k])): assert kwargs.get('bumping') or res[k][i] != resw[k][i], ( k, i, res[k][i], resw[k][i]) for k in 'covariate model'.split(): for i in range(len(res[k])): assert res[k][i] == resw[k][i]
def check_weight_m(covs, meth, weights, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) resw = clustered_model(covs, meth, model, weights=weights, **kwargs) import sys print sys.stderr, res print sys.stderr, resw for i in range(len(res)): assert resw['p'][i] > res['p'][i], ('p', i, resw['p'][i], res['p'][i]) assert resw['coef'][i] <= res['coef'][i], ('coef', i, resw['coef'][i], res['coef'][i])
def check_weight_m(covs, meth, weights, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) resw = clustered_model(covs, meth, model, weights=weights, **kwargs) import sys print sys.stderr, res print sys.stderr, resw for i in range(len(res)): assert resw['p'][i] > res['p'][i], ( 'p', i, resw['p'][i], res['p'][i]) assert resw['coef'][i] <= res['coef'][i], ( 'coef', i, resw['coef'][i], res['coef'][i])
def check_weights1(covs, meth, weights, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) resw = clustered_model(covs, meth, model, weights=weights, **kwargs) for k in 'p model covariate coef'.split(): assert k in res, res assert k in resw, resw if not 'bumping' in kwargs or k in ('model', 'covariate'): for i in range(len(res[k])): val, valw = res[k][i], resw[k][i] eq = abs(val - valw) < 1e-4 if isinstance(val, float) \ else val == valw assert eq or (np.isnan(res[k][i]) and np.isnan(resw[k][i])),\ (res[k][i], resw[k][i], k, i) assert ("|" in model) == ("|" in res['model'][0]), (model, res['model'][0])
def check_clustered_model(covs, meth, model, kwargs): res = clustered_model(covs, meth, model, **kwargs) #{'p': 0.153760092338262, 'model': 'methylation ~ disease', 'covariate': # 'diseaseTRUE', 'liptak': True, 'coef': 0.125455808080808} for k in 'p model covariate coef'.split(): assert k in res, res assert ("|" in model) == ("|" in res['model'][0]), (model, res['model'][0])
def test_clustered_model(): # test for 20 samples and 5 CpGs covs, meth = _make_data() model = "methylation ~ disease + (1|id)" r = clustered_model(covs, meth, model) yield check_clustered, r, model np.random.seed(42) exp = meth.copy() * 1.15 + np.random.random(meth.shape) for bad_name in ("", "-", " "): with tempfile.NamedTemporaryFile(delete=True) as fh: exp.index = ['gene' + bad_name + l for l in 'ABCDE'] exp.to_csv(fh.name, sep="\t", quote=False, index=True, index_label="probe") fh.flush() r = clustered_model(covs, meth, model, X="'%s'" % fh.name) yield check_clustered_df, r, model, exp