Esempio n. 1
0
def test_iqspr4df_second_col(data, test_df):
    like_mdl = data['like_mdl']
    ngram = data['ngram']
    sample_col3 = ['b']
    iqspr = IQSPR4DF(estimator=like_mdl, modifier=ngram, r_ESS=0, sample_col=sample_col3)
    
    soln3 = pd.DataFrame([[0,1],[3,3],[1,2]], columns=['a', 'b'])
    freq3 = np.array([1,3,1])
    
    uni, f = iqspr.unique(test_df)
    assert (uni == soln3).all().all()
    assert np.all(f == freq3)
Esempio n. 2
0
def test_iqspr4df_unique1(data):
    # not sure if this test can be fully reliable by only fixing the random seed
    like_mdl = data['like_mdl']
    ngram = data['ngram']
    beta = np.linspace(0.1, 1, 1)
    samples = pd.DataFrame([data['pg'][0][:2].values.repeat(2), [0, 1, 2, 3]]).T

    np.random.seed(0)
    iqspr = IQSPR4DF(estimator=like_mdl, modifier=ngram, r_ESS=0, sample_col=0)
    soln = pd.DataFrame([['C([*])C([*])(SCCC)', 'C([*])C([*])(C(=O)OCCSCCC#N)'], [0, 2]]).T  
    for s, ll, p, f in iqspr(samples, beta, yield_lpf=True):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 4
        assert (s == soln).all().all()