Esempio n. 1
0
class TestScaleableModelEquivalance(unittest.TestCase):
    def setUp(self):
        N = 5
        N1 = 2
        q = .1,.3,.4,.7
        pZ = .2
        pY = np.asanyarray([[.2,.8],[.3,.9]])
        self.model1 = ParallelConfounded.create(N,N1,pZ,pY,q)
        self.model2 = ScaleableParallelConfounded(q,pZ,pY,N1,N-N1)
        self.N1 = N1
        self.N2 = N - N1
        
    def test_P(self):
        for x in self.model1.get_parent_assignments():
            np_test.assert_array_almost_equal(self.model1.P(x),self.model2.P(x),err_msg="x:"+str(x))

    def test_V(self):
        for t in range(10):
            eta_short = self.model2.random_eta_short()
            eta = self.model2.expand(eta_short)
            
            v1 = self.model1.V(eta)
            v2 = self.model2.expand(self.model2.V_short(eta_short))
            
            np_test.assert_array_almost_equal(v1,v2,err_msg="eta:"+str(eta))
    
    def test_rewards(self):
        np_test.assert_almost_equal(self.model1.expected_Y,self.model2.expected_Y)
        np_test.assert_array_almost_equal(self.model1.expected_rewards,self.model2.expected_rewards)
Esempio n. 2
0
def regret_vs_m_general(algorithms,
                        N1_vals,
                        N,
                        T,
                        pz,
                        pY,
                        q,
                        epsilon,
                        simulations=1000):
    m_vals = []
    models = []
    regret = np.zeros((len(algorithms), len(N1_vals), simulations))

    for m_indx, N1 in enumerate(N1_vals):
        model = ScaleableParallelConfounded(q,
                                            pz,
                                            pY,
                                            N1,
                                            N - N1,
                                            compute_m=False)
        eta = [0, 0, 1.0 / (N1 + 2.0), 0, 0, 0, 1 - N1 / (N1 + 2.0)]
        model.compute_m(eta_short=eta)

        print N1, model.m
        m_vals.append(model.m)
        models.append(model)
        for a_indx, algorithm in enumerate(algorithms):
            for s in xrange(simulations):
                regret[a_indx, m_indx, s] = algorithm.run(T, model)

    return m_vals, regret, models
Esempio n. 3
0
 def setUp(self):
     N = 5
     N1 = 2
     q = .1,.3,.4,.7
     pZ = .2
     pY = np.asanyarray([[.2,.8],[.3,.9]])
     self.model1 = ParallelConfounded.create(N,N1,pZ,pY,q)
     self.model2 = ScaleableParallelConfounded(q,pZ,pY,N1,N-N1)
     self.N1 = N1
     self.N2 = N - N1
Esempio n. 4
0
def regret_vs_m_general(algorithms,
                        N1_vals,
                        N,
                        T,
                        pz,
                        pY,
                        q,
                        epsilon,
                        simulations=1000):
    m_vals = []
    regret = np.zeros((len(algorithms), len(N1_vals), simulations))
    for m_indx, N1 in enumerate(N1_vals):
        model = ScaleableParallelConfounded(q, pz, pY, N1, N - N1)
        #model = ParallelConfounded.create(N,N1,pz,pY,q,epsilon)
        #model.make_ith_arm_epsilon_best(epsilon,0)
        print N1
        m_vals.append(model.m)
        for a_indx, algorithm in enumerate(algorithms):
            for s in xrange(simulations):
                regret[a_indx, m_indx, s] = algorithm.run(T, model)

    return m_vals, regret
Esempio n. 5
0
 def test_scalable_confounded(self):
     model = ScaleableParallelConfounded(self.q,self.pz,self.pY,self.N1,self.N-self.N1)
     self.assert_samples_consistent_probabilities(model,50000)
Esempio n. 6
0
    def run(self, T, model):
        self.best_action = np.random.randint(0, model.K)
        return max(
            model.expected_rewards) - model.expected_rewards[self.best_action]


if __name__ == "__main__":

    N = 5
    N1 = 1
    pz = .1
    pz = .2
    q = (.1, .9, .2, .8)
    pY = np.asarray([[.4, .4], [.7, .7]])
    epsilon = .1
    model = ScaleableParallelConfounded(q, pz, pY, N1, N - N1)
    alg = ThompsonSampling()
    alg2 = AlphaUCB(2)
    import time
    start = time.time()
    alg.run(1000, model)
    end = time.time()
    print end - start

    #model = ParallelConfoundedNoZAction.create(N,N1,pz,q,epsilon)
    #model.make_ith_arm_epsilon_best(epsilon,0)

    #alg.run(200,model)

#    sims = 1000
#    pulls = np.zeros(model.K,dtype=int)