def test_iqspr4df_second_col(data, test_df): like_mdl = data['like_mdl'] ngram = data['ngram'] sample_col3 = ['b'] iqspr = IQSPR4DF(estimator=like_mdl, modifier=ngram, r_ESS=0, sample_col=sample_col3) soln3 = pd.DataFrame([[0,1],[3,3],[1,2]], columns=['a', 'b']) freq3 = np.array([1,3,1]) uni, f = iqspr.unique(test_df) assert (uni == soln3).all().all() assert np.all(f == freq3)
def test_iqspr4df_unique1(data): # not sure if this test can be fully reliable by only fixing the random seed like_mdl = data['like_mdl'] ngram = data['ngram'] beta = np.linspace(0.1, 1, 1) samples = pd.DataFrame([data['pg'][0][:2].values.repeat(2), [0, 1, 2, 3]]).T np.random.seed(0) iqspr = IQSPR4DF(estimator=like_mdl, modifier=ngram, r_ESS=0, sample_col=0) soln = pd.DataFrame([['C([*])C([*])(SCCC)', 'C([*])C([*])(C(=O)OCCSCCC#N)'], [0, 2]]).T for s, ll, p, f in iqspr(samples, beta, yield_lpf=True): assert np.abs(np.sum(p) - 1.0) < 1e-5 assert np.sum(f) == 4 assert (s == soln).all().all()